#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <wchar.h>
#include <glib.h>
#include <glib-object.h>
#include "debug.h"
/* Structures and whatnot for tracking character classes. */
struct char_class_data {
- wchar_t c; /* A character. */
+ gunichar c; /* A character. */
int i; /* An integer. */
char *s; /* A string. */
int inc; /* An increment value. */
gboolean multiple; /* Whether a sequence of multiple
characters in this class should be
counted together. */
- wchar_t *code; /* A magic string that indicates this
+ gunichar *code; /* A magic string that indicates this
class should be found here. */
size_t code_length;
size_t ccount; /* The maximum number of characters
after the format specifier to
consume. */
- gboolean (*check)(const wchar_t c, struct char_class_data *data);
+ gboolean (*check)(const gunichar c, struct char_class_data *data);
/* Function to check if a character
is in this class. */
- void (*setup)(const wchar_t *s, struct char_class_data *data, int inc);
+ void (*setup)(const gunichar *s, struct char_class_data *data, int inc);
/* Setup the data struct for use in the
* above check function. */
- gboolean (*extract)(const wchar_t *s, size_t length,
+ gboolean (*extract)(const gunichar *s, size_t length,
struct char_class_data *data,
GValueArray *array);
/* Extract a parameter. */
/* Functions for checking if a particular character is part of a class, and
* for setting up a structure for use when determining matches. */
static gboolean
-char_class_exact_check(wchar_t c, struct char_class_data *data)
+char_class_exact_check(gunichar c, struct char_class_data *data)
{
return (c == data->c) ? TRUE : FALSE;
}
static void
-char_class_exact_setup(const wchar_t *s, struct char_class_data *data, int inc)
+char_class_exact_setup(const gunichar *s, struct char_class_data *data, int inc)
{
data->c = s[0];
return;
}
static void
-char_class_percent_setup(const wchar_t *s, struct char_class_data *data,
+char_class_percent_setup(const gunichar *s, struct char_class_data *data,
int inc)
{
data->c = '%';
return;
}
static gboolean
-char_class_none_extract(const wchar_t *s, size_t length,
+char_class_none_extract(const gunichar *s, size_t length,
struct char_class_data *data, GValueArray *array)
{
return FALSE;
}
static gboolean
-char_class_digit_check(wchar_t c, struct char_class_data *data)
+char_class_digit_check(gunichar c, struct char_class_data *data)
{
switch (c) {
case '0':
return FALSE;
}
static void
-char_class_digit_setup(const wchar_t *s, struct char_class_data *data, int inc)
+char_class_digit_setup(const gunichar *s, struct char_class_data *data, int inc)
{
data->inc = inc;
return;
}
static gboolean
-char_class_digit_extract(const wchar_t *s, size_t length,
+char_class_digit_extract(const gunichar *s, size_t length,
struct char_class_data *data, GValueArray *array)
{
long ret = 0;
}
static gboolean
-char_class_multi_check(wchar_t c, struct char_class_data *data)
+char_class_multi_check(gunichar c, struct char_class_data *data)
{
switch (c) {
case '0':
return FALSE;
}
static void
-char_class_multi_setup(const wchar_t *s, struct char_class_data *data, int inc)
+char_class_multi_setup(const gunichar *s, struct char_class_data *data, int inc)
{
data->inc = inc;
return;
}
static gboolean
-char_class_multi_extract(const wchar_t *s, size_t length,
+char_class_multi_extract(const gunichar *s, size_t length,
struct char_class_data *data, GValueArray *array)
{
long ret = 0;
}
static gboolean
-char_class_any_check(wchar_t c, struct char_class_data *data)
+char_class_any_check(gunichar c, struct char_class_data *data)
{
return (c >= data->c) ? TRUE : FALSE;
}
static void
-char_class_any_setup(const wchar_t *s, struct char_class_data *data, int inc)
+char_class_any_setup(const gunichar *s, struct char_class_data *data, int inc)
{
data->c = s[0] + inc;
return;
}
static gboolean
-char_class_any_extract(const wchar_t *s, size_t length,
+char_class_any_extract(const gunichar *s, size_t length,
struct char_class_data *data, GValueArray *array)
{
long ret = 0;
}
static gboolean
-char_class_string_check(wchar_t c, struct char_class_data *data)
+char_class_string_check(gunichar c, struct char_class_data *data)
{
return (c != data->c) ? TRUE : FALSE;
}
static void
-char_class_string_setup(const wchar_t *s, struct char_class_data *data, int inc)
+char_class_string_setup(const gunichar *s, struct char_class_data *data, int inc)
{
data->c = s[0];
return;
}
static size_t
-xwcsnlen(const wchar_t *s, size_t length)
+unichar_snlen(const gunichar *s, size_t length)
{
size_t i;
for (i = 0; i < length; i++) {
}
return length;
}
+static void
+unichar_sncpy(gunichar *d, const gunichar *s, size_t length)
+{
+ int i;
+ for (i = 0; i < length; i++) {
+ d[i] = s[i];
+ if (s[i] == 0) {
+ break;
+ }
+ }
+}
+static int
+unichar_sncmp(const gunichar *a, const gunichar *b, size_t length)
+{
+ int i;
+ for (i = 0; i < length; i++) {
+ if (a[i] != b[i]) {
+ return a[i] - b[i];
+ }
+ if (a[i] == 0) {
+ break;
+ }
+ }
+ return 0;
+}
static gboolean
-char_class_string_extract(const wchar_t *s, size_t length,
+char_class_string_extract(const gunichar *s, size_t length,
struct char_class_data *data, GValueArray *array)
{
- wchar_t *ret = NULL;
+ gunichar *ret = NULL;
size_t len;
GValue value;
- len = xwcsnlen(s, length);
- ret = g_malloc0((len + 1) * sizeof(wchar_t));
- wcsncpy(ret, s, len);
+ len = unichar_snlen(s, length);
+ ret = g_malloc0((len + 1) * sizeof(gunichar));
+ unichar_sncpy(ret, s, len);
#ifdef VTE_DEBUG
if (vte_debug_on(VTE_DEBUG_PARSE)) {
fprintf(stderr, "Extracting string `%ls'.\n", ret);
return TRUE;
}
-static wchar_t empty_wstring[] = {'\0'};
-static wchar_t digit_wstring1[] = {'%', '2', '\0'};
-static wchar_t digit_wstring2[] = {'%', 'd', '\0'};
-static wchar_t any_wstring[] = {'%', '+', '\0'};
-static wchar_t exact_wstring[] = {'%', '%', '\0'};
-static wchar_t string_wstring[] = {'%', 's', '\0'};
-static wchar_t multi_wstring[] = {'%', 'm', '\0'};
+static gunichar empty_wstring[] = {'\0'};
+static gunichar digit_wstring1[] = {'%', '2', '\0'};
+static gunichar digit_wstring2[] = {'%', 'd', '\0'};
+static gunichar any_wstring[] = {'%', '+', '\0'};
+static gunichar exact_wstring[] = {'%', '%', '\0'};
+static gunichar string_wstring[] = {'%', 's', '\0'};
+static gunichar multi_wstring[] = {'%', 'm', '\0'};
static struct char_class char_classes[] = {
{exact, FALSE, empty_wstring, 0, 1,
/* Add the given pattern, with its own result string, to the trie, with the
* given initial increment value. */
static void
-vte_trie_addx(struct vte_trie *trie, wchar_t *pattern, size_t length,
+vte_trie_addx(struct vte_trie *trie, gunichar *pattern, size_t length,
const char *result, GQuark quark, int inc)
{
unsigned long i;
struct char_class *cclass = NULL;
struct char_class_data data;
- wchar_t *code;
+ gunichar *code;
size_t len = 0, ccount = 0;
- wchar_t inc_wstring[] = {'%', 'i', '\0'};
+ gunichar inc_wstring[] = {'%', 'i', '\0'};
/* The trivial case -- we'll just set the result at this node. */
if (length == 0) {
/* If this part of the control sequence indicates incrementing a
* parameter, keep track of the incrementing, skip over the increment
* substring, and keep going. */
- if ((length >= 2) && (wcsncmp(pattern, inc_wstring, 2) == 0)) {
+ if ((length >= 2) && (unichar_sncmp(pattern, inc_wstring, 2) == 0)) {
vte_trie_addx(trie, pattern + 2, length - 2,
result, quark, inc + 1);
return;
len = char_classes[i].code_length;
code = char_classes[i].code;
ccount = char_classes[i].ccount;
- if ((len <= length) && (wcsncmp(pattern, code, len) == 0)) {
+ if ((len <= length) && (unichar_sncmp(pattern, code, len) == 0)) {
cclass = &char_classes[i];
break;
}
vte_trie_add(struct vte_trie *trie, const char *pattern, size_t length,
const char *result, GQuark quark)
{
- mbstate_t state;
char *wpattern, *wpattern_end, *tpattern;
GIConv conv;
size_t wlength;
quark = g_quark_from_string(result);
}
- wlength = sizeof(wchar_t) * (length + 1);
+ wlength = sizeof(gunichar) * (length + 1);
wpattern = wpattern_end = g_malloc0(wlength + 1);
- memset(&state, 0, sizeof(state));
- conv = g_iconv_open("WCHAR_T", "UTF-8");
+ conv = g_iconv_open(vte_trie_wide_encoding(), "UTF-8");
g_assert(conv != ((GIConv) -1));
tpattern = (char*)pattern;
g_iconv(conv, &tpattern, &length, &wpattern_end, &wlength);
if (length == 0) {
- wlength = (wpattern_end - wpattern) / sizeof(wchar_t);
- vte_trie_addx(trie, (wchar_t*)wpattern, wlength,
+ wlength = (wpattern_end - wpattern) / sizeof(gunichar);
+ vte_trie_addx(trie, (gunichar*)wpattern, wlength,
result, quark, 0);
}
g_iconv_close(conv);
* empty string on a partial initial match, a NULL if there's no match in the
* works, and the result string if we have an exact match. */
static const char *
-vte_trie_matchx(struct vte_trie *trie, const wchar_t *pattern, size_t length,
+vte_trie_matchx(struct vte_trie *trie, const gunichar *pattern, size_t length,
gboolean greedy,
- const char **res, const wchar_t **consumed,
+ const char **res, const gunichar **consumed,
GQuark *quark, GValueArray *array)
{
unsigned int i;
const char *best = NULL;
GValueArray *bestarray = NULL;
GQuark bestquark = 0;
- const wchar_t *bestconsumed = pattern;
+ const gunichar *bestconsumed = pattern;
/* Make sure that attempting to save output values doesn't kill us. */
if (res == NULL) {
if (trie->trie_paths[i].cclass->type == cc) {
/* If it matches this character class... */
if (cclass->check(pattern[0], data)) {
- const wchar_t *prospect = pattern + 1;
+ const gunichar *prospect = pattern + 1;
const char *tmp;
GQuark tmpquark = 0;
GValueArray *tmparray;
g_value_array_free(bestarray);
}
#if 0
- g_print("`%s' ", best);
+ printf("`%s' ", best);
dump_array(array);
#endif
*quark = bestquark;
struct vte_trie_table *table;
enum cclass cc;
int i;
- wchar_t c;
+ gunichar c;
/* Free the precomputed table (if there is one). */
if (trie->table != NULL) {
* empty string on a partial initial match, a NULL if there's no match in the
* works, and the result string if we have an exact match. */
TRIE_MAYBE_STATIC const char *
-vte_trie_match(struct vte_trie *trie, const wchar_t *pattern, size_t length,
- const char **res, const wchar_t **consumed,
+vte_trie_match(struct vte_trie *trie, const gunichar *pattern, size_t length,
+ const char **res, const gunichar **consumed,
GQuark *quark, GValueArray **array)
{
const char *ret = NULL;
GQuark tmpquark;
GValueArray *valuearray;
GValue *value;
- const wchar_t *dummyconsumed;
+ const gunichar *dummyconsumed;
gpointer ptr;
gboolean greedy = FALSE;
int i;
break;
}
if (trie->trie_paths[i].trie->result != NULL) {
- g_print("%s = `%s'\n", buf,
+ printf("%s = `%s'\n", buf,
trie->trie_paths[i].trie->result);
}
vte_trie_printx(trie->trie_paths[i].trie, buf, nodecount);
{
size_t nodecount = 0;
vte_trie_printx(trie, "", &nodecount);
- g_print("Trie has %ld nodes.\n", (long) nodecount);
+ printf("Trie has %ld nodes.\n", (long) nodecount);
+}
+
+#define SAMPLE "ABCDEF"
+static char *
+vte_trie_find_valid_encoding(char **list, size_t length, gboolean wide)
+{
+ gunichar wbuffer[8];
+ unsigned char nbuffer[8];
+ void *buffer;
+ char inbuf[BUFSIZ];
+ char outbuf[BUFSIZ];
+ char *ibuf, *obuf;
+ gsize isize, osize;
+ int i;
+ gsize outbytes;
+ GIConv conv;
+
+ if (wide) {
+ buffer = wbuffer;
+ } else {
+ buffer = nbuffer;
+ }
+
+ for (i = 0; SAMPLE[i] != '\0'; i++) {
+ wbuffer[i] = nbuffer[i] = SAMPLE[i];
+ }
+ wbuffer[i] = nbuffer[i] = SAMPLE[i];
+
+ for (i = 0; i < length; i++) {
+ conv = g_iconv_open(list[i], "UTF-8");
+ if (conv == ((GIConv) -1)) {
+#ifdef VTE_DEBUG
+ if (vte_debug_on(VTE_DEBUG_MISC)) {
+ fprintf(stderr, "Conversions to `%s' are not "
+ "supported by giconv.\n", list[i]);
+ }
+#endif
+ continue;
+ }
+
+ ibuf = (char*) &inbuf;
+ strcpy(inbuf, SAMPLE);
+ isize = 3;
+ obuf = (char*) &outbuf;
+ osize = sizeof(outbuf);
+
+ g_iconv(conv, &ibuf, &isize, &obuf, &osize);
+ g_iconv_close(conv);
+
+ outbytes = sizeof(outbuf) - osize;
+ if ((isize == 0) && (outbytes > 0)) {
+ if (memcmp(outbuf, buffer, outbytes) == 0) {
+#ifdef VTE_DEBUG
+ if (vte_debug_on(VTE_DEBUG_MISC)) {
+ fprintf(stderr, "Found iconv target "
+ "`%s'.\n", list[i]);
+ }
+#endif
+ return g_strdup(list[i]);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+TRIE_MAYBE_STATIC const char *
+vte_trie_wide_encoding()
+{
+ char *wide[] = {
+ "10646",
+ "ISO_10646",
+ "ISO-10646",
+ "ISO10646",
+ "ISO-10646-1",
+ "ISO10646-1",
+ "ISO-10646/UCS4",
+ "UCS-4",
+ "UCS4",
+ "UCS-4-BE",
+ "UCS-4BE",
+ "UCS4-BE",
+ "UCS-4-INTERNAL",
+ "UCS-4-LE",
+ "UCS-4LE",
+ "UCS4-LE",
+ "UNICODE",
+ "UNICODE-BIG",
+ "UNICODEBIG",
+ "UNICODE-LITTLE",
+ "UNICODELITTLE",
+ "WCHAR_T",
+ };
+ static char *ret = NULL;
+ if (ret == NULL) {
+ ret = vte_trie_find_valid_encoding(wide,
+ G_N_ELEMENTS(wide),
+ TRUE);
+ }
+ return ret;
+}
+
+TRIE_MAYBE_STATIC const char *
+vte_trie_narrow_encoding()
+{
+ char *narrow[] = {
+ "8859-1",
+ "ISO-8859-1",
+ "ISO8859-1",
+ };
+ static char *ret = NULL;
+ if (ret == NULL) {
+ ret = vte_trie_find_valid_encoding(narrow,
+ G_N_ELEMENTS(narrow),
+ FALSE);
+ }
+ return ret;
}
#ifdef TRIE_MAIN
{
unsigned int i;
if (array != NULL) {
- g_print("args = {");
+ printf("args = {");
for (i = 0; i < array->n_values; i++) {
GValue *value;
value = g_value_array_get_nth(array, i);
if (i > 0) {
- g_print(", ");
+ printf(", ");
}
if (G_VALUE_HOLDS_LONG(value)) {
- g_print("%ld", g_value_get_long(value));
+ printf("%ld", g_value_get_long(value));
}
if (G_VALUE_HOLDS_STRING(value)) {
- g_print("`%s'", g_value_get_string(value));
+ printf("`%s'", g_value_get_string(value));
}
if (G_VALUE_HOLDS_POINTER(value)) {
printf("`%ls'",
- (wchar_t*)g_value_get_pointer(value));
+ (gunichar*)g_value_get_pointer(value));
}
}
- g_print("}\n");
+ printf("}\n");
}
}
static void
convert_mbstowcs(const char *i, size_t ilen,
- wchar_t *o, size_t *olen, size_t max_olen)
+ gunichar *o, size_t *olen, size_t max_olen)
{
GIConv conv;
size_t outlen;
- conv = g_iconv_open("WCHAR_T", "UTF-8");
+ conv = g_iconv_open(vte_trie_wide_encoding(), "UTF-8");
g_assert(conv != ((GIConv) -1));
memset(o, 0, max_olen);
g_iconv(conv, (char**)&i, &ilen, (char**)&o, &outlen);
g_iconv_close(conv);
- *olen = (max_olen - outlen) / sizeof(wchar_t);
+ *olen = (max_olen - outlen) / sizeof(gunichar);
}
int
struct vte_trie *trie;
GValueArray *array = NULL;
GQuark quark;
- wchar_t buf[LINE_MAX];
- const wchar_t *consumed;
+ gunichar buf[LINE_MAX];
+ const gunichar *consumed;
size_t buflen;
+ vte_debug_parse_string(getenv("VTE_DEBUG_FLAGS"));
+
g_type_init();
trie = vte_trie_new();
vte_trie_precompute(trie);
}
}
+
+ printf("Wide encoding is `%s'.\n", vte_trie_wide_encoding());
+ printf("Narrow encoding is `%s'.\n", vte_trie_narrow_encoding());
+
vte_trie_print(trie);
- g_print("\n");
+ printf("\n");
quark = 0;
convert_mbstowcs("abc", 3, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abc",
+ printf("`%s' = `%s'\n", "abc",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abcdef", 6, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abcdef",
+ printf("`%s' = `%s'\n", "abcdef",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abcde", 5, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abcde",
+ printf("`%s' = `%s'\n", "abcde",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abcdeg", 6, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abcdeg",
+ printf("`%s' = `%s'\n", "abcdeg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abc%deg", 7, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abc%deg",
+ printf("`%s' = `%s'\n", "abc%deg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abc10eg", 7, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abc10eg",
+ printf("`%s' = `%s'\n", "abc10eg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abc%eg", 6, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abc%eg",
+ printf("`%s' = `%s'\n", "abc%eg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abc%10eg", 8, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abc%10eg",
+ printf("`%s' = `%s'\n", "abc%10eg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("abcBeg", 6, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "abcBeg",
+ printf("`%s' = `%s'\n", "abcBeg",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>[25;26H", 12, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>[25;26H",
+ printf("`%s' = `%s'\n", "<esc>[25;26H",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>[25;2", 10, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>[25;2",
+ printf("`%s' = `%s'\n", "<esc>[25;2",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>[25L", 9, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>[25L",
+ printf("`%s' = `%s'\n", "<esc>[25L",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>[25L<esc>[24L", 18, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>[25L<esc>[24L",
+ printf("`%s' = `%s'\n", "<esc>[25L<esc>[24L",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>[25;26L", 12, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>[25;26L",
+ printf("`%s' = `%s'\n", "<esc>[25;26L",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>]2;WoofWoofh", 17, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>]2;WoofWoofh",
+ printf("`%s' = `%s'\n", "<esc>]2;WoofWoofh",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>]2;WoofWoofh<esc>]2;WoofWoofh", 34,
buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>]2;WoofWoofh<esc>]2;WoofWoofh",
+ printf("`%s' = `%s'\n", "<esc>]2;WoofWoofh<esc>]2;WoofWoofh",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
quark = 0;
convert_mbstowcs("<esc>]2;WoofWoofhfoo", 20, buf, &buflen, sizeof(buf));
- g_print("`%s' = `%s'\n", "<esc>]2;WoofWoofhfoo",
+ printf("`%s' = `%s'\n", "<esc>]2;WoofWoofhfoo",
vte_trie_match(trie, buf, buflen,
NULL, &consumed, &quark, &array));
- g_print("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
+ printf("=> `%s' (%d)\n", g_quark_to_string(quark), consumed - buf);
if (array != NULL) {
dump_array(array);
g_value_array_free(array);
#include <termios.h>
#include <unistd.h>
#include <wchar.h>
-#include <wctype.h>
#include <glib.h>
#include <glib-object.h>
#include <gdk/gdk.h>
/* The structure we use to hold characters we're supposed to display -- this
* includes any supported visible attributes. */
struct vte_charcell {
- wchar_t c; /* The wide character. */
+ gunichar c; /* The Unicode character. */
guint16 columns: 2; /* Number of visible columns (as determined
- by wcwidth(c)). */
+ by g_unicode_iswide(c)). */
guint16 fore: 5; /* Indices in the color palette for the */
guint16 back: 5; /* foreground and background of the cell. */
guint16 standout: 1; /* Single-bit attributes. */
typedef struct _VteScreen VteScreen;
typedef struct _VteWordCharRange {
- wchar_t start, end;
+ gunichar start, end;
} VteWordCharRange;
/* Terminal private data. */
const char *gxencoding[4]; /* alternate encodings */
/* Input data queues. */
- GIConv incoming_conv; /* narrow/wide conversion state */
+ GIConv incoming_conv; /* narrow/unichar conversion state */
unsigned char *incoming; /* pending output characters */
size_t n_incoming;
gboolean processing;
static void vte_terminal_set_termcap(VteTerminal *terminal, const char *path,
gboolean reset);
static void vte_terminal_ensure_cursor(VteTerminal *terminal, gboolean current);
-static void vte_terminal_insert_char(GtkWidget *widget, wchar_t c,
+static void vte_terminal_insert_char(GtkWidget *widget, gunichar c,
gboolean force_insert);
static void vte_sequence_handler_clear_screen(VteTerminal *terminal,
const char *match,
return g_array_new(FALSE, FALSE, sizeof(struct vte_charcell));
}
+/* Guess at how many columns a character takes up. */
+static ssize_t
+vte_unichar_width(gunichar c)
+{
+ return g_unichar_isdefined(c) ? (g_unichar_iswide(c) ? 2 : 1) : -1;
+}
+
+/* Check how long a string of unichars is. Slow version. */
+static ssize_t
+vte_unicode_strlen(gunichar *c)
+{
+ int i;
+ for (i = 0; c[i] != 0; i++) ;
+ return i;
+}
+
+/* Convert a gunichar to a wchar_t for use with X. */
+static wchar_t
+vte_wc_from_unichar(gunichar c)
+{
+#ifdef __STDC_ISO_10646__
+ return (wchar_t) c;
+#else
+#error "Don't know how to convert from gunichar to wchar_t!"
+#endif
+}
+
/* Reset defaults for character insertion. */
static void
vte_terminal_set_default_attributes(VteTerminal *terminal)
size_t icount, ocount;
old_codeset = terminal->pvt->encoding;
-
if (codeset == NULL) {
codeset = nl_langinfo(CODESET);
}
/* Open new conversions. */
- new_iconv = g_iconv_open("WCHAR_T", codeset);
- new_oconvw = g_iconv_open(codeset, "WCHAR_T");
- new_oconvu = g_iconv_open(codeset, "UTF-8");
+ new_iconv = g_iconv_open(vte_trie_wide_encoding(), codeset);
if (new_iconv == ((GIConv) -1)) {
g_warning(_("Unable to convert characters from %s to %s."),
- codeset, "WCHAR_T");
- return;
+ codeset, vte_trie_wide_encoding());
+ if (terminal->pvt->encoding != NULL) {
+ /* Keep the current encoding. */
+ return;
+ }
}
+ new_oconvw = g_iconv_open(codeset, vte_trie_wide_encoding());
if (new_oconvw == ((GIConv) -1)) {
g_warning(_("Unable to convert characters from %s to %s."),
- "WCHAR_T", codeset);
+ vte_trie_wide_encoding(), codeset);
g_iconv_close(new_iconv);
- return;
+ if (terminal->pvt->encoding != NULL) {
+ /* Keep the current encoding. */
+ return;
+ }
}
+ new_oconvu = g_iconv_open(codeset, "UTF-8");
if (new_oconvu == ((GIConv) -1)) {
g_warning(_("Unable to convert characters from %s to %s."),
"UTF-8", codeset);
g_iconv_close(new_iconv);
g_iconv_close(new_oconvw);
- return;
+ if (terminal->pvt->encoding != NULL) {
+ /* Keep the current encoding. */
+ return;
+ }
+ }
+
+ if (new_oconvu == ((GIConv) -1)) {
+ codeset = vte_trie_narrow_encoding();
+ new_iconv = g_iconv_open(vte_trie_wide_encoding(), codeset);
+ if (new_iconv == ((GIConv) -1)) {
+ g_error(_("Unable to convert characters from %s to %s."),
+ codeset, vte_trie_wide_encoding());
+ }
+ new_oconvw = g_iconv_open(codeset, vte_trie_wide_encoding());
+ if (new_oconvw == ((GIConv) -1)) {
+ g_error(_("Unable to convert characters from %s to %s."),
+ vte_trie_wide_encoding(), codeset);
+ }
+ new_oconvu = g_iconv_open(codeset, "UTF-8");
+ if (new_oconvu == ((GIConv) -1)) {
+ g_error(_("Unable to convert characters from %s to %s."),
+ "UTF-8", codeset);
+ }
}
- /* Set up the conversion for incoming-to-wchars. */
+ /* Set up the conversion for incoming-to-gunichar. */
if (terminal->pvt->incoming_conv != ((GIConv) -1)) {
g_iconv_close(terminal->pvt->incoming_conv);
}
terminal->pvt->incoming_conv = new_iconv;
- /* Set up the conversions for wchar/utf-8 to outgoing. */
+ /* Set up the conversions for gunichar/utf-8 to outgoing. */
if (terminal->pvt->outgoing_conv_wide != ((GIConv) -1)) {
g_iconv_close(terminal->pvt->outgoing_conv_wide);
}
memset(&cell, 0, sizeof(cell));
cell = screen->defaults;
cell.c = ' ';
- cell.columns = wcwidth(cell.c);
+ cell.columns = vte_unichar_width(cell.c);
if (!current) {
cell.fore = VTE_DEF_FG;
cell.back = VTE_DEF_BG;
outbufptr = g_value_dup_string(value);
} else
if (G_VALUE_HOLDS_POINTER(value)) {
- /* Convert the wide-character string into a
+ /* Convert the unicode-character string into a
* multibyte string. */
- conv = g_iconv_open("UTF-8", "WCHAR_T");
+ conv = g_iconv_open("UTF-8", vte_trie_wide_encoding());
inbuf = g_value_get_pointer(value);
- inbuf_len = wcslen((wchar_t*)inbuf) * sizeof(wchar_t);
+ inbuf_len = vte_unicode_strlen((gunichar*)inbuf) *
+ sizeof(gunichar);
outbuf_len = (inbuf_len * VTE_UTF8_BPC) + 1;
outbuf = outbufptr = g_malloc0(outbuf_len);
if (conv != ((GIConv) -1)) {
{
g_return_if_fail(VTE_IS_TERMINAL(terminal));
#ifdef VTE_DEFAULT_ISO_8859_1
- vte_terminal_set_encoding(terminal, "ISO-8859-1");
+ vte_terminal_set_encoding(terminal, vte_trie_narrow_encoding());
#else
if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0) {
- vte_terminal_set_encoding(terminal, "ISO-8859-1");
+ vte_terminal_set_encoding(terminal, vte_trie_narrow_encoding());
} else {
vte_terminal_set_encoding(terminal, nl_langinfo(CODESET));
}
case '7':
case '=': /* Swiss. */
terminal->pvt->gxencoding[x] =
- "ISO-8859-15";
+ vte_trie_narrow_encoding();
break;
}
}
/* Insert a single character into the stored data array. */
static void
-vte_terminal_insert_char(GtkWidget *widget, wchar_t c, gboolean force_insert)
+vte_terminal_insert_char(GtkWidget *widget, gunichar c, gboolean force_insert)
{
VteTerminal *terminal;
GArray *array;
#ifdef VTE_DEBUG
if (vte_debug_on(VTE_DEBUG_IO)) {
- fprintf(stderr, "Inserting %ld (%d/%d), delta = %ld.\n",
+ fprintf(stderr, "Inserting %ld %c (%d/%d)(%d), delta = %ld, ",
(long)c,
+ c < 256 ? c : ' ',
screen->defaults.fore, screen->defaults.back,
+ vte_unichar_width(c),
(long)screen->insert_delta);
}
#endif
/* Figure out how many columns this character should occupy. */
- columns = wcwidth(c);
-
- /* FIXME: find why this can happen, and stop it. */
+ columns = vte_unichar_width(c);
if (columns < 0) {
- g_warning(_("Character %5ld is %d columns wide, guessing 1."),
- c, columns);
+ g_warning(_("Character 0x%x is undefined, allocating one "
+ "column."), c);
columns = 1;
}
#ifdef VTE_DEBUG
if (vte_debug_on(VTE_DEBUG_IO)) {
- fprintf(stderr, "Insertion delta = %ld.\n",
+ fprintf(stderr, "insertion delta => %ld.\n",
(long)screen->insert_delta);
}
#endif
int i;
long l;
const char *s;
- const wchar_t *w;
+ const gunichar *w;
GValue *value;
fprintf(stderr, "%s(", name);
if (params != NULL) {
}
/* Free a parameter array. Most of the GValue elements can clean up after
- * themselves, but we're using gpointers to hold wide character strings, and
+ * themselves, but we're using gpointers to hold unicode character strings, and
* we need to free those ourselves. */
static void
free_params_array(GValueArray *params)
}
}
-/* Process incoming data, first converting it to wide characters, and then
+/* Process incoming data, first converting it to unicode characters, and then
* processing escape sequences. */
static gboolean
vte_terminal_process_incoming(gpointer data)
GdkRectangle rect;
char *ibuf, *obuf, *obufptr, *ubuf, *ubufptr;
size_t icount, ocount, ucount;
- wchar_t *wbuf, c;
+ gunichar *wbuf, c;
int wcount, start;
const char *match, *encoding;
GIConv unconv;
GQuark quark;
- const wchar_t *next;
+ const gunichar *next;
gboolean leftovers, modified, again, bottom;
g_return_val_if_fail(GTK_IS_WIDGET(data), FALSE);
/* We should only be called when there's data to process. */
g_assert(terminal->pvt->n_incoming > 0);
- /* Try to convert the data into wide characters. */
- ocount = sizeof(wchar_t) * terminal->pvt->n_incoming;
+ /* Try to convert the data into unicode characters. */
+ ocount = sizeof(gunichar) * terminal->pvt->n_incoming;
obuf = obufptr = g_malloc(ocount);
icount = terminal->pvt->n_incoming;
ibuf = terminal->pvt->incoming;
- /* Convert the data to wide characters. */
+ /* Convert the data to unicode characters. */
if (g_iconv(terminal->pvt->incoming_conv, &ibuf, &icount,
&obuf, &ocount) == -1) {
/* No dice. Try again when we have more data. */
if ((errno == EILSEQ) && (terminal->pvt->n_incoming > 0)) {
/* Discard the offending byte. */
start = terminal->pvt->n_incoming - icount;
- terminal->pvt->incoming[start] = '?';
#ifdef VTE_DEBUG
- if (vte_debug_on(VTE_DEBUG_IO)) {
+ if (vte_debug_on(VTE_DEBUG_IO) || 1) {
fprintf(stderr, "Error converting %ld incoming "
"data bytes: %s, discarding byte %ld "
- "and trying again.\n",
+ "(0x%02x) and trying again.\n",
(long) terminal->pvt->n_incoming,
- strerror(errno), start);
+ strerror(errno), start,
+ terminal->pvt->incoming[start]);
}
#endif
+ terminal->pvt->incoming[start] = '?';
/* Try again. */
g_free(obufptr);
return TRUE;
/* Store the current encoding. */
encoding = terminal->pvt->encoding;
- /* Compute the number of wide characters we got. */
- wcount = (obuf - obufptr) / sizeof(wchar_t);
- wbuf = (wchar_t*) obufptr;
+ /* Compute the number of unicode characters we got. */
+ wcount = (obuf - obufptr) / sizeof(gunichar);
+ wbuf = (gunichar*) obufptr;
/* Save the current cursor position. */
screen = terminal->pvt->screen;
match,
quark,
params);
- /* Skip over the proper number of wide chars. */
+ /* Skip over the proper number of unicode chars. */
start = (next - wbuf);
/* Check if the encoding's changed. If it has, we need
* to force our caller to call us again to parse the
if (leftovers) {
/* There are leftovers, so convert them back to the terminal's
* old encoding and save them for later. */
- unconv = g_iconv_open(encoding, "WCHAR_T");
+ unconv = g_iconv_open(encoding, vte_trie_wide_encoding());
if (unconv != ((GIConv) -1)) {
- icount = sizeof(wchar_t) * (wcount - start);
+ icount = sizeof(gunichar) * (wcount - start);
ibuf = (char*) &wbuf[start];
ucount = VTE_UTF8_BPC * (wcount - start) + 1;
ubuf = ubufptr = g_malloc(ucount);
if (vte_debug_on(VTE_DEBUG_IO)) {
fprintf(stderr, "Error unconverting %ld "
"pending input bytes (%s), dropping.\n",
- (long) (sizeof(wchar_t) * (wcount - start)),
+ (long) (sizeof(gunichar) * (wcount - start)),
strerror(errno));
}
#endif
}
}
-/* Send wide characters to the child. */
+/* Send unicode characters to the child. */
static gboolean
vte_terminal_io_write(GIOChannel *channel,
GdkInputCondition condition,
g_return_if_fail(VTE_IS_TERMINAL(terminal));
g_assert((strcmp(encoding, "UTF-8") == 0) ||
- (strcmp(encoding, "WCHAR_T") == 0));
+ (strcmp(encoding, vte_trie_wide_encoding()) == 0));
conv = NULL;
if (strcmp(encoding, "UTF-8") == 0) {
conv = &terminal->pvt->outgoing_conv_utf8;
}
- if (strcmp(encoding, "WCHAR_T") == 0) {
+ if (strcmp(encoding, vte_trie_wide_encoding()) == 0) {
conv = &terminal->pvt->outgoing_conv_wide;
}
g_assert(conv != NULL);
if (terminal->pvt->word_chars == NULL) {
return FALSE;
}
- /* FIXME: if a gunichar isn't a wchar_t, we're probably screwed, so
- * should we convert from UCS-4 to WCHAR_T or something here? (Is a
- * gunichar even a UCS-4 character)? Or should we convert to UTF-8
- * and then to WCHAR_T? Aaaargh. */
for (i = 0; i < terminal->pvt->word_chars->len; i++) {
range = &g_array_index(terminal->pvt->word_chars,
VteWordCharRange,
int fore, back, dcol, i, j, padding;
long xcenter, ycenter, xright, ybottom;
char utf8_buf[7] = {0,};
- wchar_t ch;
+ gunichar ch;
gboolean drawn, reverse, alternate;
PangoAttribute *attr;
PangoAttrList *attrlist;
XftTextExtents32(GDK_DISPLAY(), font, &ftc, 1,
&glyph_info);
padding = CLAMP((terminal->char_width *
- wcwidth(ch) - glyph_info.xOff) / 2,
+ (g_unichar_iswide(ch) ? 2 : 1) -
+ glyph_info.xOff) / 2,
0, 3 * terminal->char_width);
+#ifdef VTE_DEBUG
+ if (vte_debug_on(VTE_DEBUG_UPDATES)) {
+ fprintf(stderr, "Using %d pixels of padding for"
+ " character 0x%x.\n", padding, ch);
+ }
+#endif
g_tree_insert(terminal->pvt->fontpadding,
GINT_TO_POINTER(ftc),
GINT_TO_POINTER(padding));
if (!drawn) {
gpointer ptr;
XRectangle ink, logic;
+ wchar_t wc;
+ wc = vte_wc_from_unichar(ch);
ptr = g_tree_lookup(terminal->pvt->fontpadding,
GINT_TO_POINTER(ch));
padding = GPOINTER_TO_INT(ptr);
padding = 0;
} else if (padding == 0) {
XwcTextExtents(terminal->pvt->fontset,
- &ch, 1, &ink, &logic);
+ &wc, 1, &ink, &logic);
padding = CLAMP((terminal->char_width *
wcwidth(ch) - logic.width) / 2,
0, 3 * terminal->char_width);
}
/* Set the textitem's fields. */
- textitem.chars = &ch;
+ textitem.chars = &wc;
textitem.nchars = 1;
textitem.delta = 0;
textitem.font_set = terminal->pvt->fontset;
if (terminal->pvt->im_preedit != NULL) {
preedit = terminal->pvt->im_preedit;
for (i = 0; i < terminal->pvt->im_preedit_cursor; i++) {
- col += wcwidth(g_utf8_get_char(preedit));
+ col += vte_unichar_width(g_utf8_get_char(preedit));
preedit = g_utf8_next_char(preedit);
}
}
ftdraw,
#endif
FALSE);
- col += wcwidth(im_cell.c);
+ col += vte_unichar_width(im_cell.c);
preedit = g_utf8_next_char(preedit);
}
if (len > 0) {
_vte_marshal_VOID__UINT_UINT,
G_TYPE_NONE, 2, G_TYPE_UINT, G_TYPE_UINT);
+ /* Try to determine some acceptable encoding names. */
+ if (vte_trie_narrow_encoding() == NULL) {
+ g_error("Don't know how to read ISO-8859-1 data!");
+ }
+ if (vte_trie_wide_encoding() == NULL) {
+ g_error("Don't know how to read native-endian unicode data!");
+ }
+
#ifdef VTE_DEBUG
/* Turn on debugging if we were asked to. */
if (getenv("VTE_DEBUG_FLAGS") != NULL) {
vte_terminal_set_word_chars(VteTerminal *terminal, const char *spec)
{
GIConv conv;
- wchar_t *wbuf;
+ gunichar *wbuf;
char *ibuf, *ibufptr, *obuf, *obufptr;
size_t ilen, olen;
VteWordCharRange range;
}
terminal->pvt->word_chars = g_array_new(FALSE, TRUE,
sizeof(VteWordCharRange));
- /* Convert the spec from UTF-8 to a string of wchar_t. */
- conv = g_iconv_open("WCHAR_T", "UTF-8");
+ /* Convert the spec from UTF-8 to a string of gunichars . */
+ conv = g_iconv_open(vte_trie_wide_encoding(), "UTF-8");
if (conv == ((GIConv) -1)) {
/* Aaargh. We're screwed. */
g_warning(_("g_iconv_open() failed setting word characters"));
}
ilen = strlen(spec);
ibuf = ibufptr = g_strdup(spec);
- olen = (ilen + 1) * sizeof(wchar_t);
- obuf = obufptr = g_malloc0(sizeof(wchar_t) * (strlen(spec) + 1));
- wbuf = (wchar_t*) obuf;
+ olen = (ilen + 1) * sizeof(gunichar);
+ obuf = obufptr = g_malloc0(sizeof(gunichar) * (strlen(spec) + 1));
+ wbuf = (gunichar*) obuf;
wbuf[ilen] = '\0';
g_iconv(conv, &ibuf, &ilen, &obuf, &olen);
g_iconv_close(conv);
- for (i = 0; i < ((obuf - obufptr) / sizeof(wchar_t)); i++) {
+ for (i = 0; i < ((obuf - obufptr) / sizeof(gunichar)); i++) {
/* The hyphen character. */
if (wbuf[i] == '-') {
range.start = wbuf[i];