--- /dev/null
+/* Utility functions for UTF-8
+ Copyright (C) 2001, 2002 Peter Verthez
+
+ Permission granted to do anything with this file that you want, as long
+ as the above copyright is retained in all copies.
+ THERE IS NO WARRANTY - USE AT YOUR OWN RISK
+*/
+
+/* $Id$ */
+/* $Name$ */
+
+#include "utf8.h"
+
+int is_utf8_string(const char* str)
+{
+ int expect_bytes = 0;
+
+ if (!str) return 0;
+
+ while (*str) {
+ if ((*str & 0x80) == 0) {
+ /* Looks like an ASCII character */
+ if (expect_bytes)
+ /* byte of UTF-8 character expected */
+ return 0;
+ else {
+ /* OK, ASCII character expected */
+ str++;
+ }
+ }
+ else {
+ /* Looks like byte of an UTF-8 character */
+ if (expect_bytes) {
+ /* expect_bytes already set: first byte of UTF-8 char already seen */
+ if ((*str & 0xC0) == 0x80) {
+ /* OK, next byte of UTF-8 character */
+ /* Decrement number of expected bytes */
+ expect_bytes--;
+ str++;
+ }
+ else {
+ /* again first byte ?!?! */
+ return 0;
+ }
+ }
+ else {
+ /* First byte of the UTF-8 character */
+ /* count initial one bits and set expect_bytes to 1 less */
+ char ch = *str;
+ while (ch & 0x80) {
+ expect_bytes++;
+ ch = (ch & 0x7f) << 1;
+ }
+ expect_bytes--;
+ str++;
+ }
+ }
+ }
+
+ return (expect_bytes == 0);
+}
+
+int utf8_strlen(const char* str)
+{
+ int num_char = 0;
+
+ if (!str) return 0;
+
+ while (*str) {
+ if ((*str & 0x80) == 0 || (*str & 0xC0) == 0xC0)
+ num_char++;
+ str++;
+ }
+
+ return num_char;
+}
+
-/* Encoding utility from UTF-8 to locale and vice versa
+/* Header file for UTF-8 functions
Copyright (C) 2001, 2002 Peter Verthez
Permission granted to do anything with this file that you want, as long
/* $Id$ */
/* $Name$ */
-#ifndef __UTF8_LOCALE_H
-#define __UTF8_LOCALE_H
+#ifndef __UTF8_H
+#define __UTF8_H
#ifdef __cplusplus
extern "C" {
#endif
+ /* Returns -1 if the string is not a valid UTF-8 string, returns its
+ string length otherwise */
+int utf8_strlen(const char* input);
+
+ /* Returns 1 if string is valid UTF-8 string, 0 otherwise */
+int is_utf8_string(const char* input);
+
void convert_set_unknown(const char* unknown);
char* convert_utf8_to_locale(const char* input, int *conv_fails);
char* convert_locale_to_utf8(const char* input);
}
#endif
-#endif /* __UTF8_LOCALE_H */
+#endif /* __UTF8_H */