From 0a3842e05ee5ad37ffacefb70f685bbae3ad7fe6 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Thu, 3 Jan 2002 10:50:43 +0000 Subject: [PATCH] Record can also have a value (e.g. the NOTE record): extra value needed on start record callback. --- ChangeLog | 13 + doc/usage.html | 722 +++++++++++++++++++++++---------------------- gedcom/interface.c | 7 +- gedcom/interface.h | 3 +- include/gedcom.h | 5 +- standalone.c | 48 ++- 6 files changed, 432 insertions(+), 366 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3518e6f..d5ae1ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ +2002-01-03 Peter Verthez + + * gedcom.y, gedcom.h, standalone.c, interface.*: Record can also + have a value (e.g. the NOTE record), so extra value necessary on + start record callback. + + * gedcom.y: Correct value of NOTE element (opt_line_item was missing + an action). + 2002-01-02 Peter Verthez + * lots of files: Pass the parsed tag value (integer) together with + the string value in the callbacks. For this, a separate header is + auto-generated, containing the #defines from bison. + * t/test_script: Return exit code from testgedcom. * gedcom/hash.*, gedcom/encoding.*: Use hash implementation from diff --git a/doc/usage.html b/doc/usage.html index 2843d7c..fbdcf1d 100644 --- a/doc/usage.html +++ b/doc/usage.html @@ -2,434 +2,450 @@ Using the GEDCOM parser library - + - +

Using the GEDCOM parser library

-
- +
+

Index

- + - -
+ +

Overview
-

- The GEDCOM parser library is built as a callback-based parser (comparable + + The GEDCOM parser library is built as a callback-based parser (comparable to the SAX interface of XML).  It comes with:
- +
    -
  • a library (libgedcom.so), to be linked in the application +
  • a library (libgedcom.so), to be linked in the application program
  • -
  • a header file (gedcom.h), to be used in the sources +
  • a header file (gedcom.h), to be used in the sources of the application program
  • -
  • a header file (gedcom-tags.h) that is also installed, +
  • a header file (gedcom-tags.h) that is also installed, but that is automatically included via gedcom.h
    -
  • - + +
- Next to these, there is also a data directory in $PREFIX/share/gedcom-parse - that contains some additional stuff, but which is not immediately important + Next to these, there is also a data directory in $PREFIX/share/gedcom-parse + that contains some additional stuff, but which is not immediately important at first.  I'll leave the description of the data directory for later.
-
- The very simplest call of the gedcom parser is simply the following piece - of code (include of the gedcom header is assumed, as everywhere in this -manual):
- -
int result;
- ...
- result = gedcom_parse_file("myfamily.ged");
-
- Although this will not provide much information, one thing it does is -parse the entire file and return the result.  The function returns 0 -on success and 1 on failure.  No other information is available using -this function only.

- The next sections will refine this to be able to have meaningful errors + The very simplest call of the gedcom parser is simply the following +piece of code (include of the gedcom header is assumed, as everywhere in +this manual):
+ +
int result;
+ ...
+ result = gedcom_parse_file("myfamily.ged");
+
+ Although this will not provide much information, one thing it does is + parse the entire file and return the result.  The function returns +0 on success and 1 on failure.  No other information is available using + this function only.
+
+ The next sections will refine this to be able to have meaningful errors and the actual data that is in the file.
- -
+ +

Error handling

- Since this is a relatively simple topic, it is discussed before the actual - callback mechanism, although it also uses a callback...
-
- The library can be used in several different circumstances, both terminal-based - as GUI-based.  Therefore, it leaves the actual display of the error -message up to the application.  For this, the application needs to register -a callback before parsing the GEDCOM file, which will be called by the library - on errors, warnings and messages.
-
- A typical piece of code would be:
- -
void my_message_handler (Gedcom_msg_type type, - char *msg)
- {
-   ...
- }
- ...
- gedcom_set_message_handler(my_message_handler);
- ...
- result = gedcom_parse_file("myfamily.ged");

-
- In the above piece of code, my_message_handler is the callback - that will be called for errors (type=ERROR), warnings ( - type=WARNING) and messages (type=MESSAGE).  The -callback must have the signature as in the example.  For errors, the - msg passed to the callback will have the format:
- -
Error on line <lineno>: <actual_message>
-
- Note that the entire string will be properly internationalized, and encoded - in UTF-8 (see "Why UTF-8?"  LINK TBD).  Also, no newline - is appended, so that the application program can use it in any way it wants. -  Warnings are similar, but use "Warning" instead of "Error".  Messages - are plain text, without any prefix.
-
- With this in place, the resulting code will already show errors and warnings - produced by the parser, e.g. on the terminal if a simple printf - is used in the message handler.
+ Since this is a relatively simple topic, it is discussed before the actual + callback mechanism, although it also uses a callback...
+
+ The library can be used in several different circumstances, both terminal-based + as GUI-based.  Therefore, it leaves the actual display of the error + message up to the application.  For this, the application needs to +register a callback before parsing the GEDCOM file, which will be called +by the library on errors, warnings and messages.
+
+ A typical piece of code would be:
+ +
void my_message_handler (Gedcom_msg_type type, + char *msg)
+ {
+   ...
+ }
+ ...
+ gedcom_set_message_handler(my_message_handler);
+ ...
+ result = gedcom_parse_file("myfamily.ged");

+
+ In the above piece of code, my_message_handler is the callback + that will be called for errors (type=ERROR), warnings ( + type=WARNING) and messages (type=MESSAGE).  The + callback must have the signature as in the example.  For errors, the + msg passed to the callback will have the format:
-
+
Error on line <lineno>: <actual_message>
+
+ Note that the entire string will be properly internationalized, and encoded + in UTF-8 (see "Why UTF-8?"  LINK TBD).  Also, no newline + is appended, so that the application program can use it in any way it wants. +  Warnings are similar, but use "Warning" instead of "Error".  Messages + are plain text, without any prefix.
+
+ With this in place, the resulting code will already show errors and warnings + produced by the parser, e.g. on the terminal if a simple printf + is used in the message handler.
+ +

Data callback mechanism

- The most important use of the parser is of course to get the data out -of the GEDCOM file.  As already mentioned, the parser uses a callback -mechanism for that.  In fact, the mechanism involves two levels.
-
- The primary level is that each of the sections in a GEDCOM file is notified - to the application code via a "start element" callback and an "end element" - callback (much like in a SAX interface for XML), i.e. when a line containing - a certain tag is parsed, the "start element" callback is called for that -tag, and when all its subordinate lines with their tags have been processed, -the "end element" callback is called for the original tag.  Since GEDCOM - is hierarchical, this results in properly nested calls to appropriate "start - element" and "end element" callbacks.
-
- However, it would be typical for a genealogy program to support only a -subset of the GEDCOM standard, certainly a program that is still under development. -  Moreover, under GEDCOM it is allowed for an application to define its - own tags, which will typically not  be supported by another application. -  Still, in that case, data preservation is important; it would hardly - be accepted that information that is not understood by a certain program -is just removed.
-
- Therefore, the second level of callbacks involves a "default callback". -  An application needs to subscribe to callbacks for tags it does support, - and need to provide a "default callback" which will be called for tags it - doesn't support.  The application can then choose to just store the + The most important use of the parser is of course to get the data out +of the GEDCOM file.  As already mentioned, the parser uses a callback + mechanism for that.  In fact, the mechanism involves two levels.
+
+ The primary level is that each of the sections in a GEDCOM file is notified + to the application code via a "start element" callback and an "end element" + callback (much like in a SAX interface for XML), i.e. when a line containing + a certain tag is parsed, the "start element" callback is called for that + tag, and when all its subordinate lines with their tags have been processed, + the "end element" callback is called for the original tag.  Since GEDCOM + is hierarchical, this results in properly nested calls to appropriate "start + element" and "end element" callbacks.
+
+ However, it would be typical for a genealogy program to support only +a subset of the GEDCOM standard, certainly a program that is still under +development.  Moreover, under GEDCOM it is allowed for an application +to define its own tags, which will typically not  be supported by another +application.  Still, in that case, data preservation is important; +it would hardly be accepted that information that is not understood by +a certain program is just removed.
+
+ Therefore, the second level of callbacks involves a "default callback". +  An application needs to subscribe to callbacks for tags it does support, + and need to provide a "default callback" which will be called for tags it + doesn't support.  The application can then choose to just store the information that comes via the default callback in plain textual format.
-
- After this introduction, let's see what the API looks like...
-
- +
+ After this introduction, let's see what the API looks like...
+
+

Start and end callbacks

- +

Callbacks for records
-

- As a simple example, we will get some information from the header of a -GEDCOM file.  First, have a look at the following piece of code:
- -
Gedcom_ctxt my_header_start_cb (int level, - Gedcom_val xref, char *tag, int parsed_tag)
- {
-   printf("The header starts\n");
-   return (Gedcom_ctxt)1;
- }
-
- void my_header_end_cb (Gedcom_ctxt self)
- {
-   printf("The header ends, context is %d\n", self);   /* context - will print as "1" */
- }
-
- ...
- gedcom_subscribe_to_record(REC_HEAD, my_header_start_cb, + + As a simple example, we will get some information from the header of +a GEDCOM file.  First, have a look at the following piece of code:
+ +
Gedcom_ctxt my_header_start_cb (int level, +
+                      +          Gedcom_val xref,
+                      +          char *tag,
+                      +          char *raw_value,
+                      +          int parsed_tag,
+                      +          Gedcom_val parsed_value)
+ {
+   printf("The header starts\n");
+   return (Gedcom_ctxt)1;
+ }
+
+ void my_header_end_cb (Gedcom_ctxt self)
+ {
+   printf("The header ends, context is %d\n", self);   /* context + will print as "1" */
+ }
+
+ ...
+ gedcom_subscribe_to_record(REC_HEAD, my_header_start_cb, my_header_end_cb);
- ...
- result = gedcom_parse_file("myfamily.ged");

-
- Using the gedcom_subscribe_to_record function, the application - requests to use the specified callbacks as start and end callback. The end - callback is optional: you can pass NULL if you are not interested - in the end callback.  The identifiers to use as first argument to the - function (here REC_HEAD) are described in the - interface details.
-
- From the name of the function it becomes clear that this function is specific - to complete records.  For the separate elements in records there is -another function, which we'll see shortly.  Again, the callbacks need -to have the signatures as shown in the example.
-
- The Gedcom_ctxt type that is used as a result of the start - callback and as an argument to the end callback is vital for passing context - necessary for the application.  This type is meant to be opaque; in -fact, it's a void pointer, so you can pass anything via it.  The important - thing to know is that the context that the application returns in the start - callback will be passed in the end callback as an argument, and as we will + ...
+ result = gedcom_parse_file("myfamily.ged");

+
+ Using the gedcom_subscribe_to_record function, the application + requests to use the specified callbacks as start and end callback. The +end callback is optional: you can pass NULL if you are not +interested in the end callback.  The identifiers to use as first argument +to the function (here REC_HEAD) are described in the + interface details.
+
+ From the name of the function it becomes clear that this function is +specific to complete records.  For the separate elements in records +there is another function, which we'll see shortly.  Again, the callbacks +need to have the signatures as shown in the example.
+
+ The Gedcom_ctxt type that is used as a result of the start + callback and as an argument to the end callback is vital for passing context + necessary for the application.  This type is meant to be opaque; in +fact, it's a void pointer, so you can pass anything via it.  The important + thing to know is that the context that the application returns in the start + callback will be passed in the end callback as an argument, and as we will see shortly, also to all the directly subordinate elements of the record.
-
-The tag is the GEDCOM tag in string format, the parsed_tag - is an integer, for which symbolic values are defined as TAG_HEAD, - TAG_SOUR, TAG_DATA, ... and USERTAG -for the application-specific tags.  These values are defined in the +
+ The tag is the GEDCOM tag in string format, the parsed_tag + is an integer, for which symbolic values are defined as TAG_HEAD, + TAG_SOUR, TAG_DATA, ... and USERTAG + for the application-specific tags.  These values are defined in the header gedcom-tags.h that is installed, and included via -gedcom.h (so no need to include gedcom-tags.h yourself).
-
- The example passes a simple integer as context, but an application could - e.g. pass a struct that will contain the information for the - header.  In the end callback, the application could then e.g. do some - finalizing operations on the struct to put it in its database.
-
- (Note that the Gedcom_val type for the xref -argument was not discussed, see further for this)
-
- + gedcom.h (so no need to include gedcom-tags.h yourself).
+
+ The example passes a simple integer as context, but an application could + e.g. pass a struct that will contain the information for the + header.  In the end callback, the application could then e.g. do some + finalizing operations on the struct to put it in its database.
+
+ (Note that the Gedcom_val type for the xref + and parsed_value arguments was not discussed, see further +for this)
+
+

Callbacks for elements

- We will now retrieve the SOUR field (the name of the program that wrote + We will now retrieve the SOUR field (the name of the program that wrote the file) from the header:
- -
Gedcom_ctxt my_header_source_start_cb(Gedcom_ctxt - parent,
-                     -                  int   + +
Gedcom_ctxt my_header_source_start_cb(Gedcom_ctxt + parent,
+                     +                  int         level,
-                     -                  char*   +                     +                  char*       tag,
-                     -                  char*   +                     +                  char*       raw_value,
-                      -                int     +                       +                int         parsed_tag,
-                     -                  Gedcom_val +                     +                  Gedcom_val  parsed_value)
- {
-   char *source = GEDCOM_STRING(parsed_value);
-   printf("This file was written by %s\n", source);
-   return parent;
- }
-
- void my_header_source_end_cb(Gedcom_ctxt parent,
-                     + {
+   char *source = GEDCOM_STRING(parsed_value);
+   printf("This file was written by %s\n", source);
+   return parent;
+ }
+
+ void my_header_source_end_cb(Gedcom_ctxt parent,
+                              Gedcom_ctxt self,
-                     +                              Gedcom_val  parsed_value)
- {
-   printf("End of the source description\n");
- }
-
- ...
- gedcom_subscribe_to_element(ELT_HEAD_SOUR,
-                     + {
+   printf("End of the source description\n");
+ }
+
+ ...
+ gedcom_subscribe_to_element(ELT_HEAD_SOUR,
+                             my_header_source_start_cb,
-                     +                             my_header_source_end_cb);
- ...
- result = gedcom_parse_file("myfamily.ged");

-
- The subscription mechanism for elements is similar, only the signatures - of the callbacks differ.  The signature for the start callback shows - that the context of the parent line (e.g. the struct that describes - the header) is passed to this start callback.  The callback itself returns - here the same context, but this can be its own context object of course. - The end callback is called with both the context of the parent and -the context of itself, which will be the same in the example.  Again, -the list of identifiers to use as a first argument for the subscription function -are detailed in the interface -details .
-
- If we look at the other arguments of the start callback, we see the level - number (the initial number of the line in the GEDCOM file), the tag (e.g. - "SOUR"), and then a raw value, a parsed tag and a parsed value.  The -raw value is just the raw string that occurs as value on the line next to -the tag (in UTF-8 encoding).  The parsed value is the meaningful value -that is parsed from that raw string.  The parsed tag is described in + ...
+ result = gedcom_parse_file("myfamily.ged");

+
+ The subscription mechanism for elements is similar, only the signatures + of the callbacks differ.  The signature for the start callback shows + that the context of the parent line (e.g. the struct that describes + the header) is passed to this start callback.  The callback itself +returns here the same context, but this can be its own context object of +course.  The end callback is called with both the context of the parent +and the context of itself, which will be the same in the example.  Again, + the list of identifiers to use as a first argument for the subscription +function are detailed in the +interface details .
+
+ If we look at the other arguments of the start callback, we see the level + number (the initial number of the line in the GEDCOM file), the tag (e.g. + "SOUR"), and then a raw value, a parsed tag and a parsed value.  The +raw value is just the raw string that occurs as value on the line next to +the tag (in UTF-8 encoding).  The parsed value is the meaningful value +that is parsed from that raw string.  The parsed tag is described in the section for record callbacks.
+
+ The Gedcom_val type is meant to be an opaque type.  The + only thing that needs to be known about it is that it can contain specific + data types, which have to be retrieved from it using pre-defined macros. +  These data types are described in the + interface details.

- The Gedcom_val type is meant to be an opaque type.  The - only thing that needs to be known about it is that it can contain specific - data types, which have to be retrieved from it using pre-defined macros. - These data types are described in the - interface details.
-
- Some extra notes:
- + Some extra notes:
+
    -
  • The Gedcom_val argument of the end callback - is currently not used.  It is there for future enhancements.
  • -
  • There is also a Gedcom_val argument in the - start callback for records.  This argument is currently a string value +
  • The Gedcom_val argument of the end callback + is currently not used.  It is there for future enhancements.
  • +
  • There is also a Gedcom_val argument in the + start callback for records.  This argument is currently a string value giving the pointer in string form.
  • - +
- +

Default callbacks
-

- As described above, an application doesn't always implement the entire -GEDCOM spec, and application-specific tags may have been added by other applications. -  To preserve this extra data anyway, a default callback can be registered - by the application, as in the following example:
- -
void my_default_cb (Gedcom_ctxt parent, + + As described above, an application doesn't always implement the entire + GEDCOM spec, and application-specific tags may have been added by other +applications.  To preserve this extra data anyway, a default callback +can be registered by the application, as in the following example:
+ +
void my_default_cb (Gedcom_ctxt parent, int level, char* tag, char* raw_value, int parsed_tag)
- {
-   ...
- }
-
- ...
- gedcom_set_default_callback(my_default_cb);
- ...
- result = gedcom_parse_file("myfamily.ged");

-
- This callback has a similar signature as the previous ones, -but it doesn't contain a parsed value.  However, it does contain the -parent context, that was returned by the application for the most specific + {
+   ...
+ }
+
+ ...
+ gedcom_set_default_callback(my_default_cb);
+ ...
+ result = gedcom_parse_file("myfamily.ged");

+
+ This callback has a similar signature as the previous ones, +but it doesn't contain a parsed value.  However, it does contain the +parent context, that was returned by the application for the most specific containing tag that the application supported.
-
- Suppose e.g. that this callback is called for some tags in the header that - are specific to some other application, then our application could make -sure that the parent context contains the struct or object that represents -the header, and use the default callback here to add the level, tag and -raw_value as plain text in a member of that struct or object, thus preserving -the information.  The application can then write this out when the -data is saved again in a GEDCOM file.  To make it more specific, consider +
+ Suppose e.g. that this callback is called for some tags in the header +that are specific to some other application, then our application could +make sure that the parent context contains the struct or object that represents +the header, and use the default callback here to add the level, tag and raw_value + as plain text in a member of that struct or object, thus preserving the +information.  The application can then write this out when the data +is saved again in a GEDCOM file.  To make it more specific, consider the following example:
- +
struct header {
-   char* source;
-   ...
-   char* extra_text;
- };
-
- Gedcom_ctxt my_header_start_cb(int level, Gedcom_val xref, char* tag, int -parsed_tag)
- {
-   struct header head = my_make_header_struct();
-   return (Gedcom_ctxt)head;
- }
-
- void my_default_cb(Gedcom_ctxt parent, int level, char* tag, char* raw_value, +   char* source;
+   ...
+   char* extra_text;
+ };
+
+ Gedcom_ctxt my_header_start_cb(int level, Gedcom_val xref, char* tag, +char *raw_value,
+                      +         int parsed_tag, Gedcom_val parsed_value)
+ {
+   struct header head = my_make_header_struct();
+   return (Gedcom_ctxt)head;
+ }
+
+ void my_default_cb(Gedcom_ctxt parent, int level, char* tag, char* raw_value, int parsed_tag)
- {
-   struct header head = (struct header)parent;
-   my_header_add_to_extra_text(head, level, tag, raw_value);
- }
-
- gedcom_set_default_callback(my_default_cb);
- gedcom_subscribe_to_record(REC_HEAD, my_header_start, NULL);
- ...
- result = gedcom_parse_file(filename);

-
- Note that the default callback will be called for any tag that isn't specifically - subscribed upon by the application, and can thus be called in various contexts. -  For simplicity, the example above doesn't take this into account (the - parent could be of different types, depending + {
+   struct header head = (struct header)parent;
+   my_header_add_to_extra_text(head, level, tag, raw_value);
+ }
+
+ gedcom_set_default_callback(my_default_cb);
+ gedcom_subscribe_to_record(REC_HEAD, my_header_start, NULL);
+ ...
+ result = gedcom_parse_file(filename);
+ + Note that the default callback will be called for any tag that isn't specifically + subscribed upon by the application, and can thus be called in various contexts. +  For simplicity, the example above doesn't take this into account (the + parent could be of different types, depending on the context).
- -
+ +
+

Other API functions
-

- Although the above describes the basic interface of libgedcom, there are - some other functions that allow to customize the behaviour of the library. + + Although the above describes the basic interface of libgedcom, there are + some other functions that allow to customize the behaviour of the library.  These will be explained in the current section.
- +

Debugging

- The library can generate various debugging output, not only from itself, - but also the debugging output generated by the yacc parser.  By default, - no debugging output is generated, but this can be customized using the following + The library can generate various debugging output, not only from itself, + but also the debugging output generated by the yacc parser.  By default, + no debugging output is generated, but this can be customized using the following function:
- -
void gedcom_set_debug_level (int level, + +
void gedcom_set_debug_level (int level, FILE* trace_output)
-
- The level can be one of the following values:
- +
+ The level can be one of the following values:
+
    -
  • 0:  no debugging information (this is the default)
  • -
  • 1:  only debugging information from libgedcom +
  • 0:  no debugging information (this is the default)
  • +
  • 1:  only debugging information from libgedcom itself
  • -
  • 2:  debugging information from libgedcom and -yacc
  • - +
  • 2:  debugging information from libgedcom and + yacc
  • +
- If the trace_output is NULL, debugging information - will be written to stderr, otherwise the given file handle -is used (which must be open).
-
- + If the trace_output is NULL, debugging information + will be written to stderr, otherwise the given file handle is + used (which must be open).
+
+

Error treatment

- One of the previous sections already described the callback to be registered - to get error messages.  The library also allows to customize what happens + One of the previous sections already described the callback to be registered + to get error messages.  The library also allows to customize what happens on an error, using the following function:
- -
void gedcom_set_error_handling (Gedcom_err_mech + +
void gedcom_set_error_handling (Gedcom_err_mech mechanism)
-
- The mechanism can be one of:
- +
+ The mechanism can be one of:
+
    -
  • IMMED_FAIL: immediately fail the parsing - on an error (this is the default)
  • -
  • DEFER_FAIL: continue parsing after -an error, but return a failure code eventually
  • -
  • IGNORE_ERRORS: continue parsing after +
  • IMMED_FAIL: immediately fail the +parsing on an error (this is the default)
  • +
  • DEFER_FAIL: continue parsing after + an error, but return a failure code eventually
  • +
  • IGNORE_ERRORS: continue parsing after an error, return success always
  • - +
- This doesn't influence the generation of error or warning messages, only + This doesn't influence the generation of error or warning messages, only the behaviour of the parser and its return code.
-
- +
+

Compatibility mode
-

- Applications are not necessarily true to the GEDCOM spec (or use a different - version than 5.5).  The intention is that the library is resilient -to this, and goes in compatibility mode for files written by specific programs - (detected via the HEAD.SOUR tag).  This compatibility mode can be enabled + + Applications are not necessarily true to the GEDCOM spec (or use a different + version than 5.5).  The intention is that the library is resilient to + this, and goes in compatibility mode for files written by specific programs + (detected via the HEAD.SOUR tag).  This compatibility mode can be enabled and disabled via the following function:
- +
void gedcom_set_compat_handling - (int enable_compat)
-
- The argument can be:
- + (int enable_compat)

+ + The argument can be:
+
    -
  • 0: disable compatibility mode
  • -
  • 1: allow compatibility mode (this is the default)
    -
  • - +
  • 0: disable compatibility mode
  • +
  • 1: allow compatibility mode (this is the default)
    +
  • +
- Note that, currently, no actual compatibility code is present, but this -is on the to-do list.
- -
+ Note that, currently, no actual compatibility code is present, but this + is on the to-do list.
+ +
+
$Id$
$Name$
- +
                    
- + + diff --git a/gedcom/interface.c b/gedcom/interface.c index 8a69ffe..152599a 100644 --- a/gedcom/interface.c +++ b/gedcom/interface.c @@ -56,11 +56,12 @@ void gedcom_subscribe_to_element(Gedcom_elt elt, } Gedcom_ctxt start_record(Gedcom_rec rec, - int level, Gedcom_val xref, struct tag_struct tag) + int level, Gedcom_val xref, struct tag_struct tag, + char *raw_value, Gedcom_val parsed_value) { Gedcom_rec_start_cb cb = record_start_callback[rec]; if (cb != NULL) - return (*cb)(level, xref, tag.string, tag.value); + return (*cb)(level, xref, tag.string, raw_value, tag.value, parsed_value); else return NULL; } @@ -81,7 +82,7 @@ Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, if (cb != NULL) ctxt = (*cb)(parent, level, tag.string, raw_value, tag.value, parsed_value); - else if (default_cb != NULL) + else if (default_cb != NULL && parent != NULL) (*default_cb)(parent, level, tag.string, raw_value, tag.value); return ctxt; } diff --git a/gedcom/interface.h b/gedcom/interface.h index 6eb0ed6..19498fa 100644 --- a/gedcom/interface.h +++ b/gedcom/interface.h @@ -27,7 +27,8 @@ #include "gedcom.h" Gedcom_ctxt start_record(Gedcom_rec rec, - int level, Gedcom_val xref, struct tag_struct tag); + int level, Gedcom_val xref, struct tag_struct tag, + char *raw_value, Gedcom_val parsed_value); void end_record(Gedcom_rec rec, Gedcom_ctxt self); Gedcom_ctxt start_element(Gedcom_elt elt, Gedcom_ctxt parent, diff --git a/include/gedcom.h b/include/gedcom.h index a7a64c8..495beb8 100644 --- a/include/gedcom.h +++ b/include/gedcom.h @@ -363,7 +363,7 @@ typedef Gedcom_val_struct* Gedcom_val; /* This returns the char* from a Gedcom_val, if appropriate */ /* It gives a gedcom_warning if the cast is not correct */ #define GEDCOM_STRING(VAL) \ - GV_CHECK_CAST(VAL, GV_CHAR_PTR, string_val, "") + GV_CHECK_CAST(VAL, GV_CHAR_PTR, string_val, "") #define GEDCOM_IS_STRING(VAL) \ GV_IS_TYPE(VAL, GV_CHAR_PTR) @@ -380,7 +380,8 @@ typedef void typedef Gedcom_ctxt (*Gedcom_rec_start_cb) - (int level, Gedcom_val xref, char *tag, int tag_value); + (int level, Gedcom_val xref, char *tag, char *raw_value, + int tag_value, Gedcom_val parsed_value); typedef void (*Gedcom_rec_end_cb) (Gedcom_ctxt self); diff --git a/standalone.c b/standalone.c index 524dfac..9f435f5 100644 --- a/standalone.c +++ b/standalone.c @@ -59,7 +59,9 @@ void show_help () printf(" -3 Run the test parse 3 times instead of once\n"); } -Gedcom_ctxt header_start(int level, Gedcom_val xref, char *tag, int tag_value) +Gedcom_ctxt header_start(int level, Gedcom_val xref, char *tag, + char *raw_value, int tag_value, + Gedcom_val parsed_value) { output(1, "Header start\n"); return (Gedcom_ctxt)0; @@ -73,19 +75,44 @@ void header_end(Gedcom_ctxt self) char family_xreftags[100][255]; int family_nr = 0; -Gedcom_ctxt family_start(int level, Gedcom_val xref, char *tag, int tag_value) +Gedcom_ctxt family_start(int level, Gedcom_val xref, char *tag, + char *raw_value, int tag_value, + Gedcom_val parsed_value) { output(1, "Family start, xref is %s\n", GEDCOM_STRING(xref)); strcpy(family_xreftags[family_nr], GEDCOM_STRING(xref)); return (Gedcom_ctxt)(family_nr++); } +Gedcom_ctxt rec_start(int level, Gedcom_val xref, char *tag, + char *raw_value, int tag_value, + Gedcom_val parsed_value) +{ + char *xref_str = NULL; + if (! GEDCOM_IS_NULL(xref)) + xref_str = GEDCOM_STRING(xref); + output(1, "Rec %s start, xref is %s\n", tag, xref_str); + return (Gedcom_ctxt)tag_value; +} + +Gedcom_ctxt note_start(int level, Gedcom_val xref, char *tag, + char *raw_value, int tag_value, + Gedcom_val parsed_value) +{ + output(0, "== %d %s (%d) %s (xref is %s)\n", + level, tag, tag_value, GEDCOM_STRING(parsed_value), + GEDCOM_STRING(xref)); + return (Gedcom_ctxt)tag_value; +} + void family_end(Gedcom_ctxt self) { output(1, "Family end, xref is %s\n", family_xreftags[(int)self]); } -Gedcom_ctxt submit_start(int level, Gedcom_val xref, char *tag, int tag_value) +Gedcom_ctxt submit_start(int level, Gedcom_val xref, char *tag, + char *raw_value, int tag_value, + Gedcom_val parsed_value) { output(1, "Submitter, xref is %s\n", GEDCOM_STRING(xref)); return (Gedcom_ctxt)10000; @@ -147,7 +174,14 @@ void subscribe_callbacks() { gedcom_subscribe_to_record(REC_HEAD, header_start, header_end); gedcom_subscribe_to_record(REC_FAM, family_start, family_end); + gedcom_subscribe_to_record(REC_INDI, rec_start, NULL); + gedcom_subscribe_to_record(REC_OBJE, rec_start, NULL); + gedcom_subscribe_to_record(REC_NOTE, note_start, NULL); + gedcom_subscribe_to_record(REC_REPO, rec_start, NULL); + gedcom_subscribe_to_record(REC_SOUR, rec_start, NULL); + gedcom_subscribe_to_record(REC_SUBN, rec_start, NULL); gedcom_subscribe_to_record(REC_SUBM, submit_start, NULL); + gedcom_subscribe_to_record(REC_USER, rec_start, NULL); gedcom_subscribe_to_element(ELT_HEAD_SOUR, source_start, source_end); gedcom_subscribe_to_element(ELT_SOUR_DATA_EVEN_DATE, source_date_start, NULL); @@ -156,12 +190,12 @@ void subscribe_callbacks() void gedcom_message_handler(Gedcom_msg_type type, char *msg) { if (type == MESSAGE) - fprintf(stderr, "MESSAGE: "); + output(1, "MESSAGE: "); else if (type == WARNING) - fprintf(stderr, "WARNING: "); + output(1, "WARNING: "); else if (type == ERROR) - fprintf(stderr, "ERROR: "); - fprintf(stderr, "%s\n", msg); + output(1, "ERROR: "); + output(1, "%s\n", msg); } int main(int argc, char* argv[]) -- 2.30.2