2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
25 #include "ANSI_Z39.47-tables.h"
27 /* Omit first half of table: assume identity mapping (ASCII) */
28 static const unsigned short ansi_z39_47_2uni[128] = TABLE_TO_UCS4_BASIC;
30 /* The outer array range runs from 0xe0 to 0xfe, the inner range from 0x20
32 static const unsigned short ansi_z39_47_2uni_comb[31][96] =
34 /* 0xe0 (hook above) */ TABLE_TO_UCS4_COMBINING_E0,
35 /* 0xe1 (grave) */ TABLE_TO_UCS4_COMBINING_E1,
36 /* 0xe2 (acute) */ TABLE_TO_UCS4_COMBINING_E2,
37 /* 0xe3 (circumflex) */ TABLE_TO_UCS4_COMBINING_E3,
38 /* 0xe4 (tilde) */ TABLE_TO_UCS4_COMBINING_E4,
39 /* 0xe5 (macron) */ TABLE_TO_UCS4_COMBINING_E5,
40 /* 0xe6 (breve) */ TABLE_TO_UCS4_COMBINING_E6,
41 /* 0xe7 (dot above) */ TABLE_TO_UCS4_COMBINING_E7,
42 /* 0xe8 (umlaut, diaeresis) */ TABLE_TO_UCS4_COMBINING_E8,
43 /* 0xe9 (caron, hacek) */ TABLE_TO_UCS4_COMBINING_E9,
44 /* 0xea (ring above) */ TABLE_TO_UCS4_COMBINING_EA,
45 /* 0xeb (ligature, left half) */ TABLE_TO_UCS4_COMBINING_EB,
46 /* 0xec (ligature, right half) */ TABLE_TO_UCS4_COMBINING_EC,
47 /* 0xed (comma above right) */ TABLE_TO_UCS4_COMBINING_ED,
48 /* 0xee (double acute) */ TABLE_TO_UCS4_COMBINING_EE,
49 /* 0xef (candrabindu) */ TABLE_TO_UCS4_COMBINING_EF,
50 /* 0xf0 (cedilla) */ TABLE_TO_UCS4_COMBINING_F0,
51 /* 0xf1 (ogonek, right hook) */ TABLE_TO_UCS4_COMBINING_F1,
52 /* 0xf2 (dot below) */ TABLE_TO_UCS4_COMBINING_F2,
53 /* 0xf3 (double dot below) */ TABLE_TO_UCS4_COMBINING_F3,
54 /* 0xf4 (ring below) */ TABLE_TO_UCS4_COMBINING_F4,
55 /* 0xf5 (double low line) */ TABLE_TO_UCS4_COMBINING_F5,
56 /* 0xf6 (line below) */ TABLE_TO_UCS4_COMBINING_F6,
57 /* 0xf7 (comma below, left hook) */ TABLE_TO_UCS4_COMBINING_F7,
58 /* 0xf8 (left half ring below, right cedilla) */ TABLE_TO_UCS4_COMBINING_F8,
59 /* 0xf9 (breve below, half circle below) */ TABLE_TO_UCS4_COMBINING_F9,
60 /* 0xfa (double tilde, left half) */ TABLE_TO_UCS4_COMBINING_FA,
61 /* 0xfb (double tilde, right half) */ TABLE_TO_UCS4_COMBINING_FB,
62 /* 0xfc */ TABLE_TO_UCS4_COMBINING_FC,
63 /* 0xfd */ TABLE_TO_UCS4_COMBINING_FD,
64 /* 0xfe (comma above, high centered comma) */ TABLE_TO_UCS4_COMBINING_FE,
67 #define BASE_PASSED 0x10000
70 ansi_z39_47_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
73 ucs4_t last_wc = conv->istate;
75 if (last_wc & BASE_PASSED) {
76 /* base character was already output, reset the state and output the
78 unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
79 *pwc = ansi_z39_47_2uni[dc-0x80];
84 conv->istate |= BASE_PASSED;
88 if (last_wc && c >= 0x20) {
89 /* Check if we can combine the character with the diacritical mark */
90 unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
91 unsigned short wc = ansi_z39_47_2uni_comb[dc-0xe0][c-0x20];
102 unsigned short wc = ansi_z39_47_2uni[c-0x80];
109 /* The range from 0xe0 to 0xfe are diacritical marks.
110 Note that in ANSEL they come *before* the base characters, in Unicode,
111 they come *after*, so we have to buffer them ... */
112 conv->istate = (state_t)c;
113 return RET_TOOFEW(1);
118 static const unsigned char ansi_z39_47_page080[][2] = TABLE_FROM_UCS4_BASIC;
119 static const unsigned char ansi_z39_47_page01a[][2] = TABLE_FROM_UCS4_PAGE_01A;
120 static const unsigned char ansi_z39_47_page022[][2] = TABLE_FROM_UCS4_PAGE_022;
121 static const unsigned char ansi_z39_47_page02b[][2] = TABLE_FROM_UCS4_PAGE_02B;
122 static const unsigned char ansi_z39_47_page030[][2] = TABLE_FROM_UCS4_PAGE_030;
123 static const unsigned char ansi_z39_47_page1ea[][2] = TABLE_FROM_UCS4_PAGE_1EA;
124 static const unsigned char ansi_z39_47_page200[][2] = TABLE_FROM_UCS4_PAGE_200;
125 static const unsigned char ansi_z39_47_page211[][2] = TABLE_FROM_UCS4_PAGE_211;
126 static const unsigned char ansi_z39_47_page266[][2] = TABLE_FROM_UCS4_PAGE_266;
127 static const unsigned char ansi_z39_47_pagefe2[][2] = TABLE_FROM_UCS4_PAGE_FE2;
130 ansi_z39_47_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
132 const unsigned char* ch = NULL;
135 #define OUTPUT(c) ++output; if (n < output) return RET_TOOSMALL; *(r++) = (c);
137 /* Since in UTF-8 diacritical marks come after the base character and in
138 ANSEL before, we need to buffer possible base characters (0x20 to 0x7f)
139 to put the diacritical mark before it if there is one following */
142 OUTPUT(conv->ostate);
146 conv->ostate = (state_t) wc;
153 else if (wc >= 0x0080 && wc < 0x017f)
154 ch = ansi_z39_47_page080[wc-0x0080];
155 else if (wc >= 0x01a0 && wc < 0x01b4)
156 ch = ansi_z39_47_page01a[wc-0x01a0];
157 else if (wc >= 0x0220 && wc < 0x0234)
158 ch = ansi_z39_47_page022[wc-0x0220];
159 else if (wc >= 0x02b0 && wc < 0x02e2)
160 ch = ansi_z39_47_page02b[wc-0x02b0];
161 else if (wc >= 0x0300 && wc < 0x0337)
162 ch = ansi_z39_47_page030[wc-0x0300];
163 else if (wc >= 0x1ea0 && wc < 0x1efa)
164 ch = ansi_z39_47_page1ea[wc-0x1ea0];
165 else if (wc >= 0x2000 && wc < 0x200f)
166 ch = ansi_z39_47_page200[wc-0x2000];
167 else if (wc >= 0x2110 && wc < 0x211a)
168 ch = ansi_z39_47_page211[wc-0x2110];
169 else if (wc >= 0x2660 && wc < 0x2674)
170 ch = ansi_z39_47_page266[wc-0x2660];
171 else if (wc >= 0xfe20 && wc < 0xfe25)
172 ch = ansi_z39_47_pagefe2[wc-0xfe20];
173 if (ch && ch[0] != 0) {
174 if (ch[1] == 0 && ch[0] >= 0xe0 && ch[0] <= 0xfe) {
175 /* Diacritical mark following a base character, buffered in ostate */
176 /* Output diacritical mark, then base character */
179 OUTPUT(conv->ostate);
187 OUTPUT(conv->ostate);