From: Peter Verthez Date: Sat, 24 Nov 2001 22:24:27 +0000 (+0000) Subject: Conversion from ANSEL to Unicode (iconv module). X-Git-Url: https://git.dlugolecki.net.pl/?a=commitdiff_plain;h=4b4a604a9dd1f072d7ede2bdda7eb563c66010f8;p=gedcom-parse.git Conversion from ANSEL to Unicode (iconv module). --- diff --git a/ansel/ANSI_Z39.47.c b/ansel/ANSI_Z39.47.c new file mode 100644 index 0000000..5826d39 --- /dev/null +++ b/ansel/ANSI_Z39.47.c @@ -0,0 +1,666 @@ +/* Generic conversion to and from ANSI Z39.47 (also known as ANSEL) + Based on the ansi_x3.110.c file from the glibc sources + Data coming from: + http://lcweb.loc.gov/marc/specifications/speccharlatin.html + + Note: in ANSEL, diacritical marks come *before* the base character; + in Unicode, they come *after*... +*/ + +#include +#include +#include +#include +#include + +static const uint32_t to_ucs4[256] = +{ + /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + /* 0x08 */ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + /* 0x18 */ 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + /* 0x28 */ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + + /* 0x80 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x88 */ 0x0088, 0x0089, 0x0000, 0x0000, 0x0000, 0x200d, 0x200c, 0x0000, + /* 0x90 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x98 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0xa0 */ 0x0000, 0x0141, 0x00d8, 0x0110, 0x00de, 0x00c6, 0x0152, 0x02b9, + /* 0xa8 */ 0x00b7, 0x266d, 0x00ae, 0x00b1, 0x01a0, 0x01af, 0x02be, 0x0000, + /* 0xb0 */ 0x02bb, 0x0142, 0x00f8, 0x0111, 0x00fe, 0x00e6, 0x0153, 0x02ba, + /* 0xb8 */ 0x0131, 0x00a3, 0x00f0, 0x0000, 0x01a1, 0x01b0, 0x0000, 0x0000, + /* 0xc0 */ 0x00b0, 0x2113, 0x2117, 0x00a9, 0x266f, 0x00bf, 0x00a1, 0x0000, + /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00df, + /* 0xd0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0xd8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0xe0 */ 0x0309, 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0306, 0x0307, + /* 0xe8 */ 0x0308, 0x030c, 0x030a, 0xfe20, 0xfe21, 0x0315, 0x030b, 0x0310, + /* 0xf0 */ 0x0327, 0x0328, 0x0323, 0x0324, 0x0325, 0x0333, 0x0332, 0x0326, + /* 0xf8 */ 0x031c, 0x032e, 0xfe22, 0xfe23, 0x0000, 0x0000, 0x0313, 0x0000 +}; + +/* The outer array range runs from 0xe0 to 0xfe, the inner range from 0x20 + to 0x7f. */ +static const uint32_t to_ucs4_comb[31][96] = +{ + /* 0xe0 (hook above) */ + { + /* 0x20 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x1ea2, 0x0000, 0x0000, 0x0000, 0x1eba, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x1ec8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ece, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ee6, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x1ef6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x1ea3, 0x0000, 0x0000, 0x0000, 0x1ebb, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x1ec9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ecf, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1ee7, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x1ef7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe1 (grave) */ + { + /* 0x20 */ 0x0060, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c0, 0x0000, 0x0000, 0x0000, 0x00c8, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x00cc, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d2, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d9, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e0, 0x0000, 0x0000, 0x0000, 0x00e8, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x00ec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f2, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f9, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe2 (acute) */ + { + /* 0x20 */ 0x00b4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c1, 0x0000, 0x0106, 0x0000, 0x00c9, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x00cd, 0x0000, 0x0000, 0x0139, 0x0000, 0x0143, 0x00d3, + /* 0x50 */ 0x0000, 0x0000, 0x0154, 0x015a, 0x0000, 0x00da, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x00dd, 0x0179, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e1, 0x0000, 0x0107, 0x0000, 0x00e9, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x00ed, 0x0000, 0x0000, 0x013a, 0x0000, 0x0144, 0x00f3, + /* 0x70 */ 0x0000, 0x0000, 0x0155, 0x015b, 0x0000, 0x00fa, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x00fd, 0x017a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe3 (circumflex) */ + { + /* 0x20 */ 0x005e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c2, 0x0000, 0x0108, 0x0000, 0x00ca, 0x0000, 0x011c, + /* 0x48 */ 0x0124, 0x00ce, 0x0134, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d4, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x015c, 0x0000, 0x00db, 0x0000, 0x0174, + /* 0x58 */ 0x0000, 0x0176, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e2, 0x0000, 0x0109, 0x0000, 0x00ea, 0x0000, 0x011d, + /* 0x68 */ 0x0125, 0x00ee, 0x0135, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f4, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x015d, 0x0000, 0x00fb, 0x0000, 0x0175, + /* 0x78 */ 0x0000, 0x0177, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe4 (tilde) */ + { + /* 0x20 */ 0x007e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x0128, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d1, 0x00d5, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0168, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x0129, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f1, 0x00f5, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0169, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe5 (macron) */ + { + /* 0x20 */ 0x00af, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0100, 0x0000, 0x0000, 0x0000, 0x0112, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x012a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014c, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016a, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0101, 0x0000, 0x0000, 0x0000, 0x0113, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x012b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014d, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016b, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe6 (breve) */ + { + /* 0x20 */ 0x02d8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0102, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011e, + /* 0x48 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016c, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0103, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011f, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016d, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe7 (dot above) */ + { + /* 0x20 */ 0x02d9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0226, 0x0000, 0x010a, 0x0000, 0x0116, 0x0000, 0x0120, + /* 0x48 */ 0x0000, 0x0130, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x022e, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x017b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0227, 0x0000, 0x010b, 0x0000, 0x0117, 0x0000, 0x0121, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x022f, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x017c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe8 (umlaut, diaeresis) */ + { + /* 0x20 */ 0x00a8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c4, 0x0000, 0x0000, 0x0000, 0x00cb, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x00cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d6, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00dc, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0178, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e4, 0x0000, 0x0000, 0x0000, 0x00eb, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x00ef, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f6, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00fc, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x00ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xe9 (caron, hacek) */ + { + /* 0x20 */ 0x02c7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0000, 0x0000, 0x010c, 0x010e, 0x011a, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x013d, 0x0000, 0x0147, 0x0000, + /* 0x50 */ 0x0000, 0x0000, 0x0158, 0x0160, 0x0164, 0x0000, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x017d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0000, 0x0000, 0x010d, 0x010f, 0x011b, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x013e, 0x0000, 0x0148, 0x0000, + /* 0x70 */ 0x0000, 0x0000, 0x0159, 0x0161, 0x0165, 0x0000, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x017e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xea (ring above) */ + { + /* 0x20 */ 0x02da, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x00c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016e, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016f, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xeb (ligature, left half) */ + { + 0x0000, + }, + /* 0xec (ligature, right half) */ + { + 0x0000, + }, + /* 0xed (comma above right) */ + { + 0x0000, + }, + /* 0xee (double acute) */ + { + /* 0x20 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0150, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0170, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0151, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0171, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xef (candrabindu) */ + { + 0x0000, + }, + /* 0xf0 (cedilla) */ + { + /* 0x20 */ 0x00b8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0000, 0x0000, 0x00c7, 0x0000, 0x0000, 0x0000, 0x0122, + /* 0x48 */ 0x0000, 0x0000, 0x0000, 0x0136, 0x013b, 0x0000, 0x0145, 0x0000, + /* 0x50 */ 0x0000, 0x0000, 0x0156, 0x015e, 0x0162, 0x0000, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0000, 0x0000, 0x00e7, 0x0000, 0x0000, 0x0000, 0x0123, + /* 0x68 */ 0x0000, 0x0000, 0x0000, 0x0137, 0x013c, 0x0000, 0x0146, 0x0000, + /* 0x70 */ 0x0000, 0x0000, 0x0157, 0x015f, 0x0163, 0x0000, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xf1 (ogonek, right hook) */ + { + /* 0x20 */ 0x02db, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x30 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x38 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x40 */ 0x0000, 0x0104, 0x0000, 0x0000, 0x0000, 0x0118, 0x0000, 0x0000, + /* 0x48 */ 0x0000, 0x012e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x50 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0172, 0x0000, 0x0000, + /* 0x58 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x60 */ 0x0000, 0x0105, 0x0000, 0x0000, 0x0000, 0x0119, 0x0000, 0x0000, + /* 0x68 */ 0x0000, 0x012f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 0x70 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0173, 0x0000, 0x0000, + /* 0x78 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + /* 0xf2 (dot below) */ + { + 0x0000, + }, + /* 0xf3 (double dot below) */ + { + 0x0000, + }, + /* 0xf4 (ring below) */ + { + 0x0000, + }, + /* 0xf5 (double low line) */ + { + 0x0000, + }, + /* 0xf6 (line below) */ + { + 0x0000, + }, + /* 0xf7 (comma below, left hook) */ + { + 0x0000, + }, + /* 0xf8 (left half ring below, right cedilla) */ + { + 0x0000, + }, + /* 0xf9 (breve below, half circle below) */ + { + 0x0000, + }, + /* 0xfa (double tilde, left half) */ + { + 0x0000, + }, + /* 0xfb (double tilde, right half) */ + { + 0x0000, + }, + /* 0xfc */ + { + 0x0000, + }, + /* 0xfd */ + { + 0x0000, + }, + /* 0xfe (comma above, high centered comma) */ + { + 0x0000, + }, +}; + + +static const char from_ucs4[][2] = +{ + /* 0x0000 */ "\x00\x00", "\x01\x00", "\x02\x00", "\x03\x00", "\x04\x00", + /* 0x0005 */ "\x05\x00", "\x06\x00", "\x07\x00", "\x08\x00", "\x09\x00", + /* 0x000a */ "\x0a\x00", "\x0b\x00", "\x0c\x00", "\x0d\x00", "\x0e\x00", + /* 0x000f */ "\x0f\x00", "\x10\x00", "\x11\x00", "\x12\x00", "\x13\x00", + /* 0x0014 */ "\x14\x00", "\x15\x00", "\x16\x00", "\x17\x00", "\x18\x00", + /* 0x0019 */ "\x19\x00", "\x1a\x00", "\x1b\x00", "\x1c\x00", "\x1d\x00", + /* 0x001e */ "\x1e\x00", "\x1f\x00", "\x20\x00", "\x21\x00", "\x22\x00", + /* 0x0023 */ "\xa6\x00", "\xa4\x00", "\x25\x00", "\x26\x00", "\x27\x00", + /* 0x0028 */ "\x28\x00", "\x29\x00", "\x2a\x00", "\x2b\x00", "\x2c\x00", + /* 0x002d */ "\x2d\x00", "\x2e\x00", "\x2f\x00", "\x30\x00", "\x31\x00", + /* 0x0032 */ "\x32\x00", "\x33\x00", "\x34\x00", "\x35\x00", "\x36\x00", + /* 0x0037 */ "\x37\x00", "\x38\x00", "\x39\x00", "\x3a\x00", "\x3b\x00", + /* 0x003c */ "\x3c\x00", "\x3d\x00", "\x3e\x00", "\x3f\x00", "\x40\x00", + /* 0x0041 */ "\x41\x00", "\x42\x00", "\x43\x00", "\x44\x00", "\x45\x00", + /* 0x0046 */ "\x46\x00", "\x47\x00", "\x48\x00", "\x49\x00", "\x4a\x00", + /* 0x004b */ "\x4b\x00", "\x4c\x00", "\x4d\x00", "\x4e\x00", "\x4f\x00", + /* 0x0050 */ "\x50\x00", "\x51\x00", "\x52\x00", "\x53\x00", "\x54\x00", + /* 0x0055 */ "\x55\x00", "\x56\x00", "\x57\x00", "\x58\x00", "\x59\x00", + /* 0x005a */ "\x5a\x00", "\x5b\x00", "\x5c\x00", "\x5d\x00", "\x5e\x00", + /* 0x005f */ "\x5f\x00", "\x60\x00", "\x61\x00", "\x62\x00", "\x63\x00", + /* 0x0064 */ "\x64\x00", "\x65\x00", "\x66\x00", "\x67\x00", "\x68\x00", + /* 0x0069 */ "\x69\x00", "\x6a\x00", "\x6b\x00", "\x6c\x00", "\x6d\x00", + /* 0x006e */ "\x6e\x00", "\x6f\x00", "\x70\x00", "\x71\x00", "\x72\x00", + /* 0x0073 */ "\x73\x00", "\x74\x00", "\x75\x00", "\x76\x00", "\x77\x00", + /* 0x0078 */ "\x78\x00", "\x79\x00", "\x7a\x00", "\x7b\x00", "\x7c\x00", + /* 0x007d */ "\x7d\x00", "\x7e\x00", "\x7f\x00", "\x00\x00", "\x00\x00", + /* 0x0082 */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x0087 */ "\x00\x00", "\x88\x00", "\x89\x00", "\x00\x00", "\x00\x00", + /* 0x008c */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x0091 */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x0096 */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x009b */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x00a0 */ "\x00\x00", "\xc6\x00", "\x00\x00", "\xb9\x00", "\x00\x00", + /* 0x00a5 */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x20", "\xc3\x00", + /* 0x00aa */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\xaa\x00", + /* 0x00af */ "\x00\x20", "\xc0\x00", "\xab\x00", "\x00\x00", "\x00\x00", + /* 0x00b4 */ "\x00\x20", "\x00\x00", "\x00\x00", "\xa8\x00", "\x00\x00", + /* 0x00b9 */ "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", "\x00\x00", + /* 0x00be */ "\x00\x00", "\xc5\x00", "\xe1\x41", "\xe2\x41", "\xe3\x41", + /* 0x00c3 */ "\xe4\x41", "\xe8\x41", "\xea\x41", "\xa5\x00", "\xf0\x43", + /* 0x00c8 */ "\xe1\x45", "\xe2\x45", "\xe3\x45", "\xe8\x45", "\xe1\x49", + /* 0x00cd */ "\xe2\x49", "\xe3\x49", "\xe8\x49", "\xa3\x00", "\xe4\x4e", + /* 0x00d2 */ "\xe1\x4f", "\xe2\x4f", "\xce\x4f", "\xe4\x4f", "\xe8\x4f", + /* 0x00d7 */ "\x00\x00", "\xa2\x00", "\xe1\x55", "\xe2\x55", "\xe3\x55", + /* 0x00dc */ "\xe8\x55", "\xe2\x59", "\xa4\x00", "\xc8\x00", "\xe1\x61", + /* 0x00e1 */ "\xe2\x61", "\xe3\x61", "\xe4\x61", "\xe8\x61", "\xea\x61", + /* 0x00e6 */ "\xb5\x00", "\xf0\x63", "\xe1\x65", "\xe2\x65", "\xe3\x65", + /* 0x00eb */ "\xe8\x65", "\xe1\x69", "\xe2\x69", "\xe3\x69", "\xe8\x69", + /* 0x00f0 */ "\xba\x00", "\xe4\x6e", "\xe1\x6f", "\xe2\x6f", "\xe3\x6f", + /* 0x00f5 */ "\xe4\x6f", "\xe8\x6f", "\x00\x00", "\xb2\x00", "\xe1\x75", + /* 0x00fa */ "\xe2\x75", "\xe3\x75", "\xe8\x75", "\xe2\x79", "\xb4\x00", + /* 0x00ff */ "\xe8\x79", "\xe5\x41", "\xe5\x61", "\xe6\x41", "\xe6\x61", + /* 0x0104 */ "\xe1\x41", "\xe1\x61", "\xe2\x43", "\xe2\x63", "\xe3\x43", + /* 0x0109 */ "\xe3\x63", "\xe7\x43", "\xe7\x63", "\xe9\x43", "\xe9\x63", + /* 0x010e */ "\xe9\x44", "\xe9\x64", "\xa3\x00", "\xb3\x00", "\xe5\x45", + /* 0x0113 */ "\xe5\x65", "\xe6\x65", "\xe6\x65", "\xe7\x45", "\xe7\x65", + /* 0x0118 */ "\xf1\x45", "\xf1\x65", "\xe9\x45", "\xe9\x65", "\xe3\x47", + /* 0x011d */ "\xe3\x67", "\xe6\x47", "\xe6\x67", "\xe7\x47", "\xe7\x67", + /* 0x0122 */ "\xf0\x47", "\xf0\x67", "\xe3\x48", "\x00\x00", "\x00\x00", + /* 0x0127 */ "\xe5\x68", "\xe4\x49", "\xe4\x69", "\xe5\x49", "\xe5\x69", + /* 0x012c */ "\xe6\x49", "\xe6\x69", "\xf1\x49", "\xf1\x69", "\xe7\x49", + /* 0x0131 */ "\xb8\x00", "\x00\x00", "\x00\x00", "\xe3\x4a", "\xe3\x6a", + /* 0x0136 */ "\xf0\x4b", "\xf0\x6b", "\x00\x00", "\xe2\x4c", "\xe2\x6c", + /* 0x013b */ "\xf0\x4c", "\xf0\x6c", "\xe9\x4c", "\xe9\x6c", "\xe7\x4c", + /* 0x0140 */ "\xe7\x6c", "\xa1\x00", "\xb1\x00", "\xe2\x4e", "\xe2\x6e", + /* 0x0145 */ "\xf0\x4e", "\xf0\x6e", "\xe9\x4e", "\xe9\x6e", "\x00\x00", + /* 0x014a */ "\x00\x00", "\x00\x00", "\xe5\x4f", "\xe5\x6f", "\xe6\x4f", + /* 0x014f */ "\xe6\x6f", "\xee\x4f", "\xee\x6f", "\xa6\x00", "\xb6\x00", + /* 0x0154 */ "\xe2\x52", "\xe2\x72", "\xf0\x52", "\xf0\x72", "\xe9\x52", + /* 0x0159 */ "\xe9\x72", "\xe2\x53", "\xe2\x73", "\xe3\x53", "\xe3\x73", + /* 0x015e */ "\xf0\x53", "\xf0\x73", "\xe9\x53", "\xe9\x73", "\xf0\x54", + /* 0x0163 */ "\xf0\x74", "\xe9\x54", "\xe9\x74", "\x00\x00", "\x00\x00", + /* 0x0168 */ "\xe4\x55", "\xe4\x75", "\xe5\x55", "\xe5\x75", "\xe6\x55", + /* 0x016d */ "\xe6\x75", "\xea\x55", "\xea\x75", "\xee\x55", "\xee\x75", + /* 0x0172 */ "\xf1\x55", "\xf1\x75", "\xe3\x57", "\xe3\x77", "\xe3\x59", + /* 0x0177 */ "\xe3\x79", "\xe8\x59", "\xe2\x5a", "\xe2\x7a", "\xe7\x5a", + /* 0x017c */ "\xe7\x7a", "\xe9\x5a", "\xe9\x7a" +/* + This table does not cover the following positions: + + 0x01a0 "\xac\x00", "\xbc\x00" + ... + 0x01af "\xad\x00", "\xbd\x00" + ... + 0x0226 "\xe7\x41", "\xe7\x61" + ... + 0x022e "\xe7\x4f", "\xe7\x6f" + ... + 0x02ba "\xb7\x00" + ... + 0x02be "\xae\x00", "\xb0\x00" + ... + 0x02c7 "\xe9\x20", + ... + 0x02d8 "\xe6\x20", "\xe7\x20", "\xea\x20", "\xf1\x20", "\xe4\x20", + 0x02dd "\xee\x20", + ... + 0x200C "\x8e\x00", "\x8d\x00" + ... + 0x2113 "\xc1\x00" + ... + 0x2117 "\xc2\x00" + ... + 0x266d "\xa9\x00", "\x00\x00", "\xc4\x00" + ... + 0xfe20 "\xeb\x00", "\xec\x00", "\xfa\x00", "\xfb\x00" + + These would blow up the table and are therefore handled specially in + the code. +*/ +}; + + +/* Definitions used in the body of the `gconv' function. */ +#define CHARSET_NAME "ANSI_Z39.47//" +#define FROM_LOOP from_ansi_z39_47 +#define TO_LOOP to_ansi_z39_47 +#define DEFINE_INIT 1 +#define DEFINE_FINI 1 +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 2 +#define MIN_NEEDED_TO 4 + +/* First define the conversion function from ANSI_Z39.47 to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MAX_NEEDED_INPUT MAX_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch = *inptr; \ + int incr; \ + \ + if (__builtin_expect (ch >= 0xe0, 0) && ch <= 0xfe) \ + { \ + /* Composed character. First test whether the next character \ + is also available. */ \ + uint32_t ch2; \ + \ + if (inptr + 1 >= inend) \ + { \ + /* The second character is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + if (__builtin_expect (ch2 < 0x20, 0) \ + || __builtin_expect (ch2 >= 0x80, 0)) \ + { \ + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++*irreversible; \ + incr = 1; \ + } \ + else \ + { \ + uint32_t ch3 = to_ucs4_comb[ch - 0xe0][ch2 - 0x20]; \ + if (ch3 != 0) { \ + ch = ch3; \ + incr = 2; \ + } \ + else { \ + ch2 = to_ucs4[ch2]; \ + put32 (outptr, ch2); \ + outptr += 4; \ + ch = to_ucs4[ch]; \ + incr = 2; \ + } \ + } \ + } \ + else \ + { \ + ch = to_ucs4[ch]; \ + incr = 1; \ + } \ + \ + if (__builtin_expect (ch, 1) == 0 && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + } \ + else \ + { \ + put32 (outptr, ch); \ + outptr += 4; \ + } \ + \ + inptr += incr; \ + } +#define LOOP_NEED_FLAGS +#include "loop.c" + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + char tmp[2]; \ + uint32_t ch = get32 (inptr); \ + const char *cp; \ + \ + if (__builtin_expect (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]), \ + 0)) \ + { \ + if (ch >= 0x1a0 && ch <= 0x1a1) \ + { \ + static const char map[2] = "\xac\xbc"; \ + tmp[0] = map[ch - 0x1a0]; \ + tmp[1] = '\0'; \ + cp = tmp; \ + } \ + else if (ch >= 0x1af && ch <= 0x1b0) \ + { \ + static const char map[2] = "\xad\xbd"; \ + tmp[0] = map[ch - 0x1af]; \ + tmp[1] = '\0'; \ + cp = tmp; \ + } \ + else if (ch >= 0x226 && ch <= 0x227) \ + { \ + static const char map[2] = "\x41\x61"; \ + tmp[0] = '\xe7'; \ + tmp[1] = map[ch - 0x226]; \ + cp = tmp; \ + } \ + else if (ch >= 0x22e && ch <= 0x22f) \ + { \ + static const char map[2] = "\x4f\x6f"; \ + tmp[0] = '\xe7'; \ + tmp[1] = map[ch - 0x22e]; \ + cp = tmp; \ + } \ + else if (ch = 0x2ba) \ + cp = "\xb7"; \ + else if (ch >= 0x2be && ch <= 0x2bf) \ + { \ + static const char map[2] = "\xae\xb0"; \ + tmp[0] = map[ch - 0x2be]; \ + tmp[1] = '\0'; \ + cp = tmp; \ + } \ + else if (ch = 0x2c7) \ + cp = "\xe9 "; \ + else if (ch >= 0x2d8 && ch <= 0x2dd && ch != 0x2dc) \ + { \ + static const char map[6] = "\xe6\xe7\xea\xf1\xe4\xee"; \ + \ + tmp[0] = map[ch - 0x2d8]; \ + tmp[1] = ' '; \ + cp = tmp; \ + } \ + else if (ch = 0x200c) \ + cp = "\x8e"; \ + else if (ch = 0x200d) \ + cp = "\x8d"; \ + else if (ch = 0x2113) \ + cp = "\xc1"; \ + else if (ch = 0x2117) \ + cp = "\xc2"; \ + else if (ch = 0x266d) \ + cp = "\xa9"; \ + else if (ch = 0x266f) \ + cp = "\xc4"; \ + else if (ch >= 0xfe20 && ch <= 0xfe23) \ + { \ + static const char map[4] = "\xeb\xec\xfa\xfb"; \ + \ + tmp[0] = map[ch - 0xfe20]; \ + tmp[1] = '\0'; \ + cp = tmp; \ + } \ + else \ + { \ + UNICODE_TAG_HANDLER (ch, 4); \ + \ + /* Illegal characters. */ \ + STANDARD_ERR_HANDLER (4); \ + } \ + } \ + else \ + { \ + cp = from_ucs4[ch]; \ + \ + if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0) \ + { \ + /* Illegal characters. */ \ + STANDARD_ERR_HANDLER (4); \ + } \ + } \ + \ + *outptr++ = cp[0]; \ + /* Now test for a possible second byte and write this if possible. */ \ + if (cp[1] != '\0') \ + { \ + if (__builtin_expect (outptr >= outend, 0)) \ + { \ + /* The result does not fit into the buffer. */ \ + --outptr; \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ + *outptr++ = cp[1]; \ + } \ + \ + inptr += 4; \ + } +#define LOOP_NEED_FLAGS +#include "loop.c" + + +/* Now define the toplevel functions. */ +#include "skeleton.c" diff --git a/ansel/Makefile b/ansel/Makefile new file mode 100644 index 0000000..1110388 --- /dev/null +++ b/ansel/Makefile @@ -0,0 +1,16 @@ +LIBTOOL=libtool +MODPATH=/usr/local/lib +MODULES=ANSI_Z39.47.so + +all: $(MODULES) + +%.so: %.lo + $(LIBTOOL) $(CC) -module -avoid-version -o $*.la $^ -rpath $(MODPATH) + mv .libs/$@ $@ + +%.lo: %.c + $(LIBTOOL) $(CC) -c $^ + +clean: + rm -rf .libs + rm -f *.o *.lo *.la *.so diff --git a/ansel/gconv-modules b/ansel/gconv-modules new file mode 100644 index 0000000..5329bb2 --- /dev/null +++ b/ansel/gconv-modules @@ -0,0 +1,19 @@ +# All lines contain the following information: + +# If the lines start with `module' +# fromset: either a name triple or a regular expression triple. +# toset: a name triple or an expression with \N to get regular +# expression matching results. +# filename: filename of the module implementing the transformation. +# If it is not absolute the path is made absolute by prepending +# the directory the configuration file is found in. +# cost: optional cost of the transformation. Default is 1. + +# If the lines start with `alias' +# alias: alias name which is not really recognized. +# name: the real name of the character set + +# from to module cost +alias ANSEL// ANSI_Z39.47// +module ANSI_Z39.47// INTERNAL ANSI_Z39.47 1 +module INTERNAL ANSI_Z39.47// ANSI_Z39.47 1 diff --git a/ansel/loop.c b/ansel/loop.c new file mode 100644 index 0000000..ae83894 --- /dev/null +++ b/ansel/loop.c @@ -0,0 +1,451 @@ +/* Conversion loop frame work. + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This file provides a frame for the reader loop in all conversion modules. + The actual code must (of course) be provided in the actual module source + code but certain actions can be written down generically, with some + customization options which are these: + + MIN_NEEDED_INPUT minimal number of input bytes needed for the next + conversion. + MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round + of conversion. + + MAX_NEEDED_INPUT you guess it, this is the maximal number of input + bytes needed. It defaults to MIN_NEEDED_INPUT + MAX_NEEDED_OUTPUT likewise for output bytes. + + LOOPFCT name of the function created. If not specified + the name is `loop' but this prevents the use + of multiple functions in the same file. + + BODY this is supposed to expand to the body of the loop. + The user must provide this. + + EXTRA_LOOP_DECLS extra arguments passed from converion loop call. + + INIT_PARAMS code to define and initialize variables from params. + UPDATE_PARAMS code to store result in params. +*/ + +#include +#include +#include +#include +#include +#include +#include /* For MIN. */ +#define __need_size_t +#include + + +/* We have to provide support for machines which are not able to handled + unaligned memory accesses. Some of the character encodings have + representations with a fixed width of 2 or 4 bytes. But if we cannot + access unaligned memory we still have to read byte-wise. */ +#undef FCTNAME2 +#if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +/* We can handle unaligned memory access. */ +# define get16(addr) *((__const uint16_t *) (addr)) +# define get32(addr) *((__const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16(addr, val) *((uint16_t *) (addr)) = (val) +# define put32(addr, val) *((uint32_t *) (addr)) = (val) + +# define FCTNAME2(name) name +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16(addr) \ + (((__const unsigned char *) (addr))[1] << 8 \ + | ((__const unsigned char *) (addr))[0]) +# define get32(addr) \ + (((((__const unsigned char *) (addr))[3] << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[0]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16(addr) \ + (((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) +# define get32(addr) \ + (((((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[3]) + +# define put16(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif + +# define FCTNAME2(name) name##_unaligned +#endif +#define FCTNAME(name) FCTNAME2(name) + + +/* We need at least one byte for the next round. */ +#ifndef MIN_NEEDED_INPUT +# error "MIN_NEEDED_INPUT definition missing" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_INPUT +# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT +#endif + +/* We produce at least one byte in the next round. */ +#ifndef MIN_NEEDED_OUTPUT +# error "MIN_NEEDED_OUTPUT definition missing" +#endif + +/* Let's see how many bytes we produce. */ +#ifndef MAX_NEEDED_OUTPUT +# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT +#endif + +/* Default name for the function. */ +#ifndef LOOPFCT +# define LOOPFCT loop +#endif + +/* Make sure we have a loop body. */ +#ifndef BODY +# error "Definition of BODY missing for function" LOOPFCT +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_DECLS +# define EXTRA_LOOP_DECLS +#endif + + +/* To make it easier for the writers of the modules, we define a macro + to test whether we have to ignore errors. */ +#define ignore_errors_p() \ + (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) + + +/* Error handling with transliteration/transcription function use and + ignoring of errors. Note that we cannot use the do while (0) trick + since `break' and `continue' must reach certain points. */ +#define STANDARD_ERR_HANDLER(Incr) \ + { \ + struct __gconv_trans_data *trans; \ + \ + result = __GCONV_ILLEGAL_INPUT; \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ + case we are not doing any error recovery outself. */ \ + break; \ + \ + /* First try the transliteration methods. */ \ + for (trans = step_data->__trans; trans != NULL; trans = trans->__next) \ + { \ + result = DL_CALL_FCT (trans->__trans_fct, \ + (step, step_data, trans->__data, *inptrp, \ + &inptr, inend, &outptr, irreversible)); \ + if (result != __GCONV_ILLEGAL_INPUT) \ + break; \ + } \ + /* If any of them recognized the input continue with the loop. */ \ + if (result != __GCONV_ILLEGAL_INPUT) \ + continue; \ + \ + /* Next see whether we have to ignore the error. If not, stop. */ \ + if (! ignore_errors_p ()) \ + break; \ + \ + /* When we come here it means we ignore the character. */ \ + ++*irreversible; \ + inptr += Incr; \ + continue; \ + } + + +/* Handling of Unicode 3.1 TAG characters. Unicode recommends + "If language codes are not relevant to the particular processing + operation, then they should be ignored." + This macro is usually called right before STANDARD_ERR_HANDLER (Incr). */ +#define UNICODE_TAG_HANDLER(Character, Incr) \ + { \ + /* TAG characters are those in the range U+E0000..U+E007F. */ \ + if (((Character) >> 7) == (0xe0000 >> 7)) \ + { \ + inptr += Incr; \ + continue; \ + } \ + } + + +/* The function returns the status, as defined in gconv.h. */ +static inline int +FCTNAME (LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, const unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ +#ifdef LOOP_NEED_STATE + mbstate_t *state = step_data->__statep; +#endif +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_EMPTY_INPUT; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + + while (inptr != inend) + { + /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the + compiler generating better code. They will be optimized away + since MIN_NEEDED_OUTPUT is always a constant. */ + if ((MIN_NEEDED_OUTPUT != 1 + && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) + || (MIN_NEEDED_OUTPUT == 1 + && __builtin_expect (outptr >= outend, 0))) + { + /* Overflow in the output buffer. */ + result = __GCONV_FULL_OUTPUT; + break; + } + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) + { + /* We don't have enough input for another complete input + character. */ + result = __GCONV_INCOMPLETE_INPUT; + break; + } + + /* Here comes the body the user provides. It can stop with + RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the + input characters vary in size), GCONV_ILLEGAL_INPUT, or + GCONV_FULL_OUTPUT (if the output characters vary in size). */ + BODY + } + + /* Update the pointers pointed to by the parameters. */ + *inptrp = inptr; + *outptrp = outptr; +#ifdef UPDATE_PARAMS + UPDATE_PARAMS; +#endif + + return result; +} + + +/* Include the file a second time to define the function to handle + unaligned access. */ +#if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 +# undef get16 +# undef get32 +# undef put16 +# undef put32 +# undef unaligned + +# define DEFINE_UNALIGNED +# include "loop.c" +# undef DEFINE_UNALIGNED +#endif + + +#if MAX_NEEDED_INPUT > 1 +# define SINGLE(fct) SINGLE2 (fct) +# define SINGLE2(fct) fct##_single +static inline int +SINGLE(LOOPFCT) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outptrp, unsigned char *outend, + size_t *irreversible EXTRA_LOOP_DECLS) +{ + mbstate_t *state = step_data->__statep; +#ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +#endif +#ifdef LOOP_NEED_DATA + void *data = step->__data; +#endif + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +#ifdef INIT_PARAMS + INIT_PARAMS; +#endif + +#ifdef UNPACK_BYTES + UNPACK_BYTES +#else + /* Add the bytes from the state to the input buffer. */ + for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +#endif + + /* Are there enough bytes in the input buffer? */ + if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) + { + *inptrp = inend; +#ifdef STORE_REST + inptr = bytebuf; + inptrp = &inptr; + inend = &bytebuf[inlen]; + + STORE_REST +#else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +#endif + + return __GCONV_INCOMPLETE_INPUT; + } + + /* Enough space in output buffer. */ + if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) + || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) + /* Overflow in the output buffer. */ + return __GCONV_FULL_OUTPUT; + + /* Now add characters from the normal input buffer. */ + do + bytebuf[inlen++] = *inptr++; + while (inlen < MAX_NEEDED_INPUT && inptr < inend); + + inptr = bytebuf; + inend = &bytebuf[inlen]; + + do + { + BODY + } + while (0); + + /* Now we either have produced an output character and consumed all the + bytes from the state and at least one more, or the character is still + incomplete, or we have some other error (like illegal input character, + no space in output buffer). */ + if (__builtin_expect (inptr != bytebuf, 1)) + { + /* We found a new character. */ + assert (inptr - bytebuf > (state->__count & 7)); + + *inptrp += inptr - bytebuf - (state->__count & 7); + *outptrp = outptr; + + result = __GCONV_OK; + + /* Clear the state buffer. */ + state->__count &= ~7; + } + else if (result == __GCONV_INCOMPLETE_INPUT) + { + /* This can only happen if we have less than MAX_NEEDED_INPUT bytes + available. */ + assert (inend != &bytebuf[MAX_NEEDED_INPUT]); + + *inptrp += inend - bytebuf - (state->__count & 7); +#ifdef STORE_REST + inptrp = &inptr; + + STORE_REST +#else + /* We don't have enough input for another complete input + character. */ + while (inptr < inend) + state->__value.__wchb[inlen++] = *inptr++; +#endif + } + + return result; +} +# undef SINGLE +# undef SINGLE2 +#endif + + +/* We remove the macro definitions so that we can include this file again + for the definition of another function. */ +#undef MIN_NEEDED_INPUT +#undef MAX_NEEDED_INPUT +#undef MIN_NEEDED_OUTPUT +#undef MAX_NEEDED_OUTPUT +#undef LOOPFCT +#undef BODY +#undef LOOPFCT +#undef EXTRA_LOOP_DECLS +#undef INIT_PARAMS +#undef UPDATE_PARAMS +#undef UNPACK_BYTES +#undef LOOP_NEED_STATE +#undef LOOP_NEED_FLAGS +#undef LOOP_NEED_DATA +#undef get16 +#undef get32 +#undef put16 +#undef put32 +#undef unaligned diff --git a/ansel/skeleton.c b/ansel/skeleton.c new file mode 100644 index 0000000..a42c028 --- /dev/null +++ b/ansel/skeleton.c @@ -0,0 +1,696 @@ +/* Skeleton for a conversion module. + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This file can be included to provide definitions of several things + many modules have in common. It can be customized using the following + macros: + + DEFINE_INIT define the default initializer. This requires the + following symbol to be defined. + + CHARSET_NAME string with official name of the coded character + set (in all-caps) + + DEFINE_FINI define the default destructor function. + + MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. + MIN_NEEDED_TO likewise for the to-charset. + + MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. + This macro is optional, it defaults to MIN_NEEDED_FROM. + MAX_NEEDED_TO likewise for the to-charset. + + DEFINE_DIRECTION_OBJECTS + two objects will be defined to be used when the + `gconv' function must only distinguish two + directions. This is implied by DEFINE_INIT. + If this macro is not defined the following + macro must be available. + + FROM_DIRECTION this macro is supposed to return a value != 0 + if we convert from the current character set, + otherwise it return 0. + + EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it + defines some code which writes out a sequence + of characters which bring the current state into + the initial state. + + FROM_LOOP name of the function implementing the conversion + from the current characters. + TO_LOOP likewise for the other direction + + ONE_DIRECTION optional. If defined to 1, only one conversion + direction is defined instead of two. In this + case, FROM_DIRECTION should be defined to 1, and + FROM_LOOP and TO_LOOP should have the same value. + + SAVE_RESET_STATE in case of an error we must reset the state for + the rerun so this macro must be defined for + stateful encodings. It takes an argument which + is nonzero when saving. + + RESET_INPUT_BUFFER If the input character sets allow this the macro + can be defined to reset the input buffer pointers + to cover only those characters up to the error. + + FUNCTION_NAME if not set the conversion function is named `gconv'. + + PREPARE_LOOP optional code preparing the conversion loop. Can + contain variable definitions. + END_LOOP also optional, may be used to store information + + EXTRA_LOOP_ARGS optional macro specifying extra arguments passed + to loop function. + */ + +#include +#include +#include +#define __need_size_t +#define __need_NULL +#include + +#ifndef STATIC_GCONV +# include +#endif + +#ifndef DL_CALL_FCT +# define DL_CALL_FCT(fct, args) fct args +#endif + +/* The direction objects. */ +#if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT +static int from_object; +static int to_object; + +# ifndef FROM_DIRECTION +# define FROM_DIRECTION (step->__data == &from_object) +# endif +#else +# ifndef FROM_DIRECTION +# error "FROM_DIRECTION must be provided if direction objects are not used" +# endif +#endif + + +/* How many bytes are needed at most for the from-charset. */ +#ifndef MAX_NEEDED_FROM +# define MAX_NEEDED_FROM MIN_NEEDED_FROM +#endif + +/* Same for the to-charset. */ +#ifndef MAX_NEEDED_TO +# define MAX_NEEDED_TO MIN_NEEDED_TO +#endif + + +/* Define macros which can access unaligned buffers. These macros are + supposed to be used only in code outside the inner loops. For the inner + loops we have other definitions which allow optimized access. */ +#ifdef _STRING_ARCH_unaligned +/* We can handle unaligned memory access. */ +# define get16u(addr) *((__const uint16_t *) (addr)) +# define get32u(addr) *((__const uint32_t *) (addr)) + +/* We need no special support for writing values either. */ +# define put16u(addr, val) *((uint16_t *) (addr)) = (val) +# define put32u(addr, val) *((uint32_t *) (addr)) = (val) +#else +/* Distinguish between big endian and little endian. */ +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define get16u(addr) \ + (((__const unsigned char *) (addr))[1] << 8 \ + | ((__const unsigned char *) (addr))[0]) +# define get32u(addr) \ + (((((__const unsigned char *) (addr))[3] << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[0]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + ((unsigned char *) (addr))[1] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[0] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[3] = __val; \ + (void) 0; }) +# else +# define get16u(addr) \ + (((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) +# define get32u(addr) \ + (((((__const unsigned char *) (addr))[0] << 8 \ + | ((__const unsigned char *) (addr))[1]) << 8 \ + | ((__const unsigned char *) (addr))[2]) << 8 \ + | ((__const unsigned char *) (addr))[3]) + +# define put16u(addr, val) \ + ({ uint16_t __val = (val); \ + ((unsigned char *) (addr))[1] = __val; \ + ((unsigned char *) (addr))[0] = __val >> 8; \ + (void) 0; }) +# define put32u(addr, val) \ + ({ uint32_t __val = (val); \ + ((unsigned char *) (addr))[3] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[2] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[1] = __val; \ + __val >>= 8; \ + ((unsigned char *) (addr))[0] = __val; \ + (void) 0; }) +# endif +#endif + + +/* For conversions from a fixed width character set to another fixed width + character set we can define RESET_INPUT_BUFFER in a very fast way. */ +#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO +/* We have to use these `if's here since the compiler cannot know that + (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */ +# define RESET_INPUT_BUFFER \ + if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \ + *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \ + else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \ + *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \ + else \ + *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM +# endif +#endif + + +/* The default init function. It simply matches the name and initializes + the step data to point to one of the objects above. */ +#if DEFINE_INIT +# ifndef CHARSET_NAME +# error "CHARSET_NAME not defined" +# endif + +extern int gconv_init (struct __gconv_step *step); +int +gconv_init (struct __gconv_step *step) +{ + /* Determine which direction. */ + if (strcmp (step->__from_name, CHARSET_NAME) == 0) + { + step->__data = &from_object; + + step->__min_needed_from = MIN_NEEDED_FROM; + step->__max_needed_from = MAX_NEEDED_FROM; + step->__min_needed_to = MIN_NEEDED_TO; + step->__max_needed_to = MAX_NEEDED_TO; + } + else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) + { + step->__data = &to_object; + + step->__min_needed_from = MIN_NEEDED_TO; + step->__max_needed_from = MAX_NEEDED_TO; + step->__min_needed_to = MIN_NEEDED_FROM; + step->__max_needed_to = MAX_NEEDED_FROM; + } + else + return __GCONV_NOCONV; + +#ifdef SAVE_RESET_STATE + step->__stateful = 1; +#else + step->__stateful = 0; +#endif + + return __GCONV_OK; +} +#endif + + +/* The default destructor function does nothing in the moment and so + we don't define it at all. But we still provide the macro just in + case we need it some day. */ +#if DEFINE_FINI +#endif + + +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_ARGS +# define EXTRA_LOOP_ARGS +#endif + + +/* This is the actual conversion function. */ +#ifndef FUNCTION_NAME +# define FUNCTION_NAME gconv +#endif + +/* The macros are used to access the function to convert single characters. */ +#define SINGLE(fct) SINGLE2 (fct) +#define SINGLE2(fct) fct##_single + + +extern int FUNCTION_NAME (struct __gconv_step *step, + struct __gconv_step_data *data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, + int do_flush, int consume_incomplete); +int +FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + const unsigned char **inptrp, const unsigned char *inend, + unsigned char **outbufstart, size_t *irreversible, int do_flush, + int consume_incomplete) +{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + __gconv_fct fct; + int status; + + fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct; + + /* If the function is called with no input this means we have to reset + to the initial state. The possibly partly converted input is + dropped. */ + if (__builtin_expect (do_flush, 0)) + { + /* This should never happen during error handling. */ + assert (outbufstart == NULL); + + status = __GCONV_OK; + +#ifdef EMIT_SHIFT_TO_INIT + if (do_flush == 1) + { + /* We preserve the initial values of the pointer variables. */ + unsigned char *outbuf = data->__outbuf; + unsigned char *outstart = outbuf; + unsigned char *outend = data->__outbufend; + +# ifdef PREPARE_LOOP + PREPARE_LOOP +# endif + +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +# endif + + /* Emit the escape sequence to reset the state. */ + EMIT_SHIFT_TO_INIT; + + /* Call the steps down the chain if there are any but only if we + successfully emitted the escape sequence. This should only + fail if the output buffer is full. If the input is invalid + it should be discarded since the user wants to start from a + clean state. */ + if (status == __GCONV_OK) + { + if (data->__flags & __GCONV_IS_LAST) + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + else + { + /* Write out all output which was produced. */ + if (outbuf > outstart) + { + const unsigned char *outerr = outstart; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, + &outerr, outbuf, NULL, + irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__builtin_expect (outerr != outbuf, 0)) + { + /* We have a problem. Undo the conversion. */ + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + } + + /* Change the status. */ + status = result; + } + } + + if (status == __GCONV_OK) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, + NULL, NULL, irreversible, 1, + consume_incomplete)); + } + } + } + else +#endif + { + /* Clear the state object. There might be bytes in there from + previous calls with CONSUME_INCOMPLETE == 1. But don't emit + escape sequences. */ + memset (data->__statep, '\0', sizeof (*data->__statep)); + + if (! (data->__flags & __GCONV_IS_LAST)) + /* Now flush the remaining steps. */ + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, + NULL, irreversible, do_flush, + consume_incomplete)); + } + } + else + { + /* We preserve the initial values of the pointer variables. */ + const unsigned char *inptr = *inptrp; + unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1) + ? data->__outbuf : *outbufstart); + unsigned char *outend = data->__outbufend; + unsigned char *outstart; + /* This variable is used to count the number of characters we + actually converted. */ + size_t lirreversible = 0; + size_t *lirreversiblep = irreversible ? &lirreversible : NULL; +#if defined _STRING_ARCH_unaligned \ + || MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \ + || MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0 +# define unaligned 0 +#else + int unaligned; +# define GEN_unaligned(name) GEN_unaligned2 (name) +# define GEN_unaligned2(name) name##_unaligned +#endif + +#ifdef PREPARE_LOOP + PREPARE_LOOP +#endif + +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + /* If the function is used to implement the mb*towc*() or wc*tomb*() + functions we must test whether any bytes from the last call are + stored in the `state' object. */ + if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1) + || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && consume_incomplete && (data->__statep->__count & 7) != 0) + { + /* Yep, we have some bytes left over. Process them now. + But this must not happen while we are called from an + error handler. */ + assert (outbufstart == NULL); + +# if MAX_NEEDED_FROM > 1 + if (MAX_NEEDED_TO == 1 || FROM_DIRECTION) + status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep + EXTRA_LOOP_ARGS); +# endif +# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION + else +# endif +# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION + status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, + outend, lirreversiblep EXTRA_LOOP_ARGS); +# endif + + if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) + return status; + } +#endif + +#if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + /* The following assumes that encodings, which have a variable length + what might unalign a buffer even though it is a aligned in the + beginning, either don't have the minimal number of bytes as a divisor + of the maximum length or have a minimum length of 1. This is true + for all known and supported encodings. */ + unaligned = ((FROM_DIRECTION + && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0 + || ((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % MIN_NEEDED_TO != 0))) + || (!FROM_DIRECTION + && (((data->__flags & __GCONV_IS_LAST) + && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0) + || (uintptr_t) inptr % MIN_NEEDED_TO != 0))); +#endif + + while (1) + { + struct __gconv_trans_data *trans; + + /* Remember the start value for this round. */ + inptr = *inptrp; + /* The outbuf buffer is empty. */ + outstart = outbuf; + +#ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (1); +#endif + + if (__builtin_expect (!unaligned, 1)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, + lirreversiblep EXTRA_LOOP_ARGS); + } +#if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, + &outbuf, outend, + lirreversiblep + EXTRA_LOOP_ARGS); + } +#endif + + /* If we were called as part of an error handling module we + don't do anything else here. */ + if (__builtin_expect (outbufstart != NULL, 0)) + { + *outbufstart = outbuf; + return status; + } + + /* Give the transliteration module the chance to store the + original text and the result in case it needs a context. */ + for (trans = data->__trans; trans != NULL; trans = trans->__next) + if (trans->__trans_context_fct != NULL) + DL_CALL_FCT (trans->__trans_context_fct, + (trans->__data, inptr, *inptrp, outstart, outbuf)); + + /* We finished one use of the loops. */ + ++data->__invocation_counter; + + /* If this is the last step leave the loop, there is nothing + we can do. */ + if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0)) + { + /* Store information about how many bytes are available. */ + data->__outbuf = outbuf; + + /* Remember how many non-identical characters we + converted in a irreversible way. */ + *irreversible += lirreversible; + + break; + } + + /* Write out all output which was produced. */ + if (__builtin_expect (outbuf > outstart, 1)) + { + const unsigned char *outerr = data->__outbuf; + int result; + + result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, + outbuf, NULL, irreversible, 0, + consume_incomplete)); + + if (result != __GCONV_EMPTY_INPUT) + { + if (__builtin_expect (outerr != outbuf, 0)) + { +#ifdef RESET_INPUT_BUFFER + RESET_INPUT_BUFFER; +#else + /* We have a problem with the in on of the functions + below. Undo the conversion upto the error point. */ + size_t nstatus; + + /* Reload the pointers. */ + *inptrp = inptr; + outbuf = outstart; + + /* Restore the state. */ +# ifdef SAVE_RESET_STATE + SAVE_RESET_STATE (0); +# endif + + if (__builtin_expect (!unaligned, 1)) + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = FROM_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = TO_LOOP (step, data, inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# if !defined _STRING_ARCH_unaligned \ + && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ + && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 + else + { + if (FROM_DIRECTION) + /* Run the conversion loop. */ + nstatus = GEN_unaligned (FROM_LOOP) (step, data, + inptrp, inend, + &outbuf, + outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + else + /* Run the conversion loop. */ + nstatus = GEN_unaligned (TO_LOOP) (step, data, + inptrp, inend, + &outbuf, outerr, + lirreversiblep + EXTRA_LOOP_ARGS); + } +# endif + + /* We must run out of output buffer space in this + rerun. */ + assert (outbuf == outerr); + assert (nstatus == __GCONV_FULL_OUTPUT); + + /* If we haven't consumed a single byte decrement + the invocation counter. */ + if (__builtin_expect (outbuf == outstart, 0)) + --data->__invocation_counter; +#endif /* reset input buffer */ + } + + /* Change the status. */ + status = result; + } + else + /* All the output is consumed, we can make another run + if everything was ok. */ + if (status == __GCONV_FULL_OUTPUT) + { + status = __GCONV_OK; + outbuf = data->__outbuf; + } + } + + if (status != __GCONV_OK) + break; + + /* Reset the output buffer pointer for the next round. */ + outbuf = data->__outbuf; + } + +#ifdef END_LOOP + END_LOOP +#endif + + /* If we are supposed to consume all character store now all of the + remaining characters in the `state' object. */ +#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1 + if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1) + || (MAX_NEEDED_FROM > 1 && FROM_DIRECTION) + || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION)) + && __builtin_expect (consume_incomplete, 0) + && status == __GCONV_INCOMPLETE_INPUT) + { +# ifdef STORE_REST + mbstate_t *state = data->__statep; + + STORE_REST +# else + size_t cnt; + + /* Make sure the remaining bytes fit into the state objects + buffer. */ + assert (inend - *inptrp < 4); + + for (cnt = 0; *inptrp < inend; ++cnt) + data->__statep->__value.__wchb[cnt] = *(*inptrp)++; + data->__statep->__count &= ~7; + data->__statep->__count |= cnt; +# endif + } +#endif + } + + return status; +} + +#undef DEFINE_INIT +#undef CHARSET_NAME +#undef DEFINE_FINI +#undef MIN_NEEDED_FROM +#undef MIN_NEEDED_TO +#undef MAX_NEEDED_FROM +#undef MAX_NEEDED_TO +#undef DEFINE_DIRECTION_OBJECTS +#undef FROM_DIRECTION +#undef EMIT_SHIFT_TO_INIT +#undef FROM_LOOP +#undef TO_LOOP +#undef SAVE_RESET_STATE +#undef RESET_INPUT_BUFFER +#undef FUNCTION_NAME +#undef PREPARE_LOOP +#undef END_LOOP +#undef ONE_DIRECTION +#undef STORE_REST