Initial revision

parents
This diff is collapsed.
/*
Copyright (c) 2001 by Juliusz Chroboczek
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* $XFree86: xc/programs/luit/charset.h,v 1.4 2002/10/17 01:06:09 dawes Exp $ */
#define T_FAILED 0
#define T_94 1
#define T_96 2
#define T_128 3
#define T_9494 4
#define T_9696 5
/* Big 5 */
#define T_94192 6
#define T_OTHER 7
/* True for charsets that pass control chars unchanged, at least in
the first byte */
#define CHARSET_REGULAR(c) ((c)->type != T_128)
typedef struct _Charset {
char *name;
int type;
unsigned char final;
unsigned int (*recode)(unsigned int, struct _Charset *self);
int (*reverse)(unsigned int, struct _Charset *self);
void *data;
int (*other_stack)(unsigned char c, OtherStatePtr aux);
OtherState *other_aux;
unsigned int (*other_recode)(unsigned int c, OtherStatePtr aux);
unsigned int (*other_reverse)(unsigned int c, OtherStatePtr aux);
struct _Charset *next;
} CharsetRec, *CharsetPtr;
typedef struct _LocaleCharset {
char *name;
int gl;
int gr;
char *g0;
char *g1;
char *g2;
char *g3;
char *other;
} LocaleCharsetRec, *LocaleCharsetPtr;
CharsetPtr getUnknownCharset(int);
CharsetPtr getCharset(unsigned char, int);
CharsetPtr getCharsetByName(char*);
void reportCharsets(void);
int getLocaleState(char *locale, char *charset,
int *gl_return, int *gr_return,
CharsetPtr *g0_return, CharsetPtr *g1_return,
CharsetPtr *g2_return, CharsetPtr *g3_return,
CharsetPtr *other_return);
This diff is collapsed.
/*
Copyright (c) 2001 by Juliusz Chroboczek
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* $XFree86: xc/programs/luit/iso2022.h,v 1.5 2002/10/17 01:06:09 dawes Exp $ */
#define ESC 0x1B
#define CSI 0x9B
#define CSI_7 '['
#define SS2 0x8E
#define SS2_7 0x4E
#define SS3 0x8F
#define SS3_7 0x4F
#define LS0 0x0F
#define LS1 0x0E
#define LS2_7 0x6E
#define LS3_7 0x6F
#define LS1R_7 0x7E
#define LS2R_7 0x7D
#define LS3R_7 0x7C
#define IS_FINAL_ESC(x) (((x) & 0xF0 ) != 0x20)
#define IS_FINAL_CSI(x) (((x) & 0xF0 ) != 0x20 && (((x) & 0xF0 ) != 0x30))
#define P_NORMAL 0
#define P_ESC 1
#define P_CSI 2
#define S_NORMAL 0
#define S_SS2 1
#define S_SS3 2
#define IF_SS 1
#define IF_LS 2
#define IF_EIGHTBIT 4
#define IF_SSGR 8
#define OF_SS 1
#define OF_LS 2
#define OF_SELECT 4
#define OF_PASSTHRU 8
typedef struct _Iso2022 {
CharsetPtr *glp, *grp;
CharsetPtr g[4];
CharsetPtr other;
int parserState;
int shiftState;
int inputFlags;
int outputFlags;
unsigned char *buffered;
int buffered_len;
int buffered_count;
int buffered_ku;
unsigned char *outbuf;
int outbuf_count;
} Iso2022Rec, *Iso2022Ptr;
#define GL(i) (*(i)->glp)
#define GR(i) (*(i)->grp)
#define G0(i) ((i)->g[0])
#define G1(i) ((i)->g[1])
#define G2(i) ((i)->g[2])
#define G3(i) ((i)->g[3])
#define OTHER(i) ((i)->other)
#define BUFFER_SIZE 512
Iso2022Ptr allocIso2022(void);
void destroyIso2022(Iso2022Ptr);
int initIso2022(char *, char *, Iso2022Ptr);
int mergeIso2022(Iso2022Ptr, Iso2022Ptr);
void reportIso2022(Iso2022Ptr);
void terminate(Iso2022Ptr, int);
void terminateEsc(Iso2022Ptr, int, unsigned char*, int);
void copyIn(Iso2022Ptr, int, unsigned char*, int);
void copyOut(Iso2022Ptr, int, unsigned char*, int);
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "locale.h"
This diff is collapsed.
/*
Copyright (c) 2001 by Juliusz Chroboczek
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
extern int iso2022;
extern int verbose;
extern int sevenbit;
extern int ilog;
extern int olog;
void child(char*, char*, char**);
void parent(int, int);
.\" $XFree86: xc/programs/luit/luit.man,v 1.7 2003/02/24 01:10:25 dawes Exp $
.TH LUIT 1 __vendorversion__
.SH NAME
luit \- Locale and ISO\ 2022 support for Unicode terminals
.SH SYNOPSIS
.B luit
[
.I options
] [
.B \-\-
] [
.I program
[
.I args
] ]
.SH DESCRIPTION
.B Luit
is a filter that can be run between an arbitrary application and a
UTF-8 terminal emulator. It will convert application output from the
locale's encoding into UTF-8, and convert terminal input from UTF-8
into the locale's encoding.
An application may also request switching to a different output
encoding using ISO\ 2022 and ISO\ 6429 escape sequences. Use of this
feature is discouraged: multilingual applications should be modified
to directly generate UTF-8 instead.
.B Luit
is usually invoked transparently by the terminal emulator. For
information about running
.B luit
from the command line, see EXAMPLES below.
.SH OPTIONS
.TP
.B \-h
Display some summary help and quit.
.TP
.B \-list
List the supported charsets and encodings, then quit.
.TP
.B \-v
Be verbose.
.TP
.B \-c
Function as a simple converter from standard input to standard output.
.TP
.B \-x
Exit as soon as the child dies. This may cause
.B luit
to loose data at the end of the child's output.
.TP
.BI \-argv0 " name"
Set the child's name (as passed in argv[0]).
.TP
.BI \-encoding " encoding"
Set up
.B luit
to use
.I encoding
rather than the current locale's encoding.
.TP
.B +oss
Disable interpretation of single shifts in application output.
.TP
.B +ols
Disable interpretation of locking shifts in application output.
.TP
.B +osl
Disable interpretation of character set selection sequences in
application output.
.TP
.B +ot
Disable interpretation of all sequences and pass all sequences in
application output to the terminal unchanged. This may lead to
interesting results.
.TP
.B \-k7
Generate seven-bit characters for keyboard input.
.TP
.B +kss
Disable generation of single-shifts for keyboard input.
.TP
.B +kssgr
Use GL codes after a single shift for keyboard input. By default, GR
codes are generated after a single shift when generating eight-bit
keyboard input.
.TP
.B \-kls
Generate locking shifts (SO/SI) for keyboard input.
.TP
.BI \-gl " gn"
Set the initial assignment of GL. The argument should be one of
.BR g0 ,
.BR g1 ,
.B g2
or
.BR g3 .
The default depends on the locale, but is usually
.BR g0 .
.TP
.BI \-gr " gk"
Set the initial assignment of GR. The default depends on the locale,
and is usually
.B g2
except for EUC locales, where it is
.BR g1 .
.TP
.BI \-g0 " charset"
Set the charset initially selected in G0. The default depends on
the locale, but is usually
.BR ASCII .
.TP
.BI \-g1 " charset"
Set the charset initially selected in G1. The default depends on the
locale.
.TP
.BI \-g2 " charset"
Set the charset initially selected in G2. The default depends on the
locale.
.TP
.BI \-g3 " charset"
Set the charset initially selected in G3. The default depends on the
locale.
.TP
.BI \-ilog " filename"
Log into
.I filename
all the bytes received from the child.
.TP
.BI \-olog " filename"
Log into
.I filename
all the bytes sent to the terminal emulator.
.TP
.B \-\-
End of options.
.SH EXAMPLES
The most typical use of
.B luit
is to adapt an instance of
.B XTerm
to the locale's encoding. Current versions of
.B XTerm
invoke
.B luit
automatically when it is needed. If you are using an older release of
.BR XTerm ,
or a different terminal emulator, you may invoke
.B luit
manually:
.IP
$ xterm \-u8 \-e luit
.PP
If you are running in a UTF-8 locale but need to access a remote
machine that doesn't support UTF-8,
.B luit
can adapt the remote output to your terminal:
.IP
$ LC_ALL=fr_FR luit ssh legacy-machine
.PP
.B Luit
is also useful with applications that hard-wire an encoding that is
different from the one normally used on the system or want to use
legacy escape sequences for multilingual output. In particular,
versions of
.B Emacs
that do not speak UTF-8 well can use
.B luit
for multilingual output:
.IP
$ luit -encoding 'ISO 8859-1' emacs -nw
.PP
And then, in
.BR Emacs ,
.IP
M-x set-terminal-coding-system RET iso-2022-8bit-ss2 RET
.PP
.SH FILES
.TP
.B __projectroot__/lib/X11/fonts/encodings/encodings.dir
The system-wide encodings directory.
.TP
.B __projectroot__/lib/X11/locale/locale.alias
The file mapping locales to locale encodings.
.SH SECURITY
On systems with SVR4 (``Unix-98'') ptys (Linux version 2.2 and later,
SVR4),
.B luit
should be run as the invoking user.
On systems without SVR4 (``Unix-98'') ptys (notably BSD variants),
running
.B luit
as an ordinary user will leave the tty world-writable; this is a
security hole, and luit will generate a warning (but still accept to
run). A possible solution is to make
.B luit
suid root;
.B luit
should drop privileges sufficiently early to make this safe. However,
the startup code has not been exhaustively audited, and the author
takes no responsibility for any resulting security issues.
.B Luit
will refuse to run if it is installed setuid and the underlying system
does not have POSIX saved ids.
.SH BUGS
None of this complexity should be necessary. Stateless UTF-8
throughout the system is the way to go.
Charsets with a non-trivial intermediary byte are not yet supported.
Selecting alternate sets of control characters is not supported and
will never be.
.SH SEE ALSO
xterm(1), unicode(7), utf-8(7), charsets(7).
.I Character Code Structure and Extension Techniques (ISO\ 2022, ECMA-35).
.I Control Functions for Coded Character Sets (ISO\ 6429, ECMA-48).
.SH AUTHOR
Luit was written by Juliusz Chroboczek <jch@xfree86.org> for the
XFree86 project.
/*
Copyright (c) 2002 by Tomohiro KUBOTA
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* $XFree86: xc/programs/luit/other.c,v 1.1 2002/10/17 01:06:09 dawes Exp $ */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <X11/fonts/fontenc.h>
#include "other.h"
#include "charset.h"
#ifndef NULL
#define NULL 0
#endif
#define EURO_10646 0x20AC
int
init_gbk(OtherStatePtr s)
{
s->gbk.mapping =
FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
if(!s->gbk.mapping) return 0;
s->gbk.reverse = FontMapReverse(s->gbk.mapping);
if(!s->gbk.reverse) return 0;
s->gbk.buf = -1;
return 1;
}
unsigned int
mapping_gbk(unsigned int n, OtherStatePtr s)
{
unsigned int r;
if(n < 128) return n;
if(n == 128) return EURO_10646;
r = FontEncRecode(n, s->gbk.mapping);
return r;
}
unsigned int
reverse_gbk(unsigned int n, OtherStatePtr s)
{
if(n < 128) return n;
if(n == EURO_10646) return 128;
return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
}
int
stack_gbk(unsigned char c, OtherStatePtr s)
{
if(s->gbk.buf < 0) {
if(c < 129) return c;
s->gbk.buf = c;
return -1;
} else {
int b;
if(c < 0x40 || c == 0x7F) {
s->gbk.buf = -1;
return c;
}
if(s->gbk.buf < 0xFF && c < 0xFF)
b = (s->gbk.buf << 8) + c;
else
b = -1;
s->gbk.buf = -1;
return b;
}
}
int
init_utf8(OtherStatePtr s)
{
s->utf8.buf_ptr = 0;
return 1;
}
unsigned int
mapping_utf8(unsigned int n, OtherStatePtr s)
{
return n;
}
unsigned int
reverse_utf8(unsigned int n, OtherStatePtr s)
{
if(n < 0x80)
return n;
if(n < 0x800)
return 0xC080 + ((n&0x7C0)<<2) + (n&0x3F);
if(n < 0x10000)
return 0xE08080 + ((n&0xF000)<<4) + ((n&0xFC0)<<2) + (n&0x3F);
return 0xF0808080 + ((n&0x1C0000)<<6) + ((n&0x3F000)<<4) +
((n&0xFC0)<<2) + (n&0x3F);
}
int
stack_utf8(unsigned char c, OtherStatePtr s)
{
int u;
if(c < 0x80) {
s->utf8.buf_ptr = 0;
return c;
}
if(s->utf8.buf_ptr == 0) {
if((c & 0x40) == 0) return -1;
s->utf8.buf[s->utf8.buf_ptr++] = c;
if((c & 0x60) == 0x40) s->utf8.len = 2;
else if((c & 0x70) == 0x60) s->utf8.len = 3;
else if((c & 0x78) == 0x70) s->utf8.len = 4;
else s->utf8.buf_ptr = 0;
return -1;
}
if((c & 0x40) != 0) {
s->utf8.buf_ptr = 0;
return -1;
}
s->utf8.buf[s->utf8.buf_ptr++] = c;
if(s->utf8.buf_ptr < s->utf8.len) return -1;
switch(s->utf8.len) {
case 2:
u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
s->utf8.buf_ptr = 0;
if(u < 0x80) return -1; else return u;
case 3:
u = ((s->utf8.buf[0] & 0x0F) << 12)
| ((s->utf8.buf[1] & 0x3F) << 6)
| (s->utf8.buf[2] & 0x3F);
s->utf8.buf_ptr = 0;
if(u < 0x800) return -1; else return u;
case 4:
u = ((s->utf8.buf[0] & 0x03) << 18)
| ((s->utf8.buf[1] & 0x3F) << 12)
| ((s->utf8.buf[2] & 0x3F) << 6)
| ((s->utf8.buf[3] & 0x3F));
s->utf8.buf_ptr = 0;
if(u < 0x10000) return -1; else return u;
}
s->utf8.buf_ptr = 0;
return -1;
}
#define HALFWIDTH_10646 0xFF61
#define YEN_SJIS 0x5C
#define YEN_10646 0x00A5
#define OVERLINE_SJIS 0x7E
#define OVERLINE_10646 0x203E
int
init_sjis(OtherStatePtr s)
{
s->sjis.x0208mapping =
FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
if(!s->sjis.x0208mapping) return 0;
s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping);
if(!s->sjis.x0208reverse) return 0;
s->sjis.x0201mapping =
FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
if(!s->sjis.x0201mapping) return 0;
s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping);
if(!s->sjis.x0201reverse) return 0;
s->sjis.buf = -1;
return 1;
}
unsigned int
mapping_sjis(unsigned int n, OtherStatePtr s)
{
unsigned int j1, j2, s1, s2;
if(n == YEN_SJIS) return YEN_10646;
if(n == OVERLINE_SJIS) return OVERLINE_10646;
if(n < 0x80) return n;
if(n >= 0xA0 && n <= 0xDF) return FontEncRecode(n, s->sjis.x0201mapping);
s1 = ((n>>8)&0xFF);
s2 = (n&0xFF);
j1 = (s1 << 1) - (s1 <= 0x9F ? 0xE0 : 0x160) - (s2 < 0x9F ? 1 : 0);
j2 = s2 - 0x1F - (s2 >= 0x7F ? 1 : 0) - (s2 >= 0x9F ? 0x5E : 0);
return FontEncRecode((j1<<8) + j2, s->sjis.x0208mapping);
}
unsigned int
reverse_sjis(unsigned int n, OtherStatePtr s)
{
unsigned int j, j1, j2, s1, s2;
if(n == YEN_10646) return YEN_SJIS;
if(n == OVERLINE_10646) return OVERLINE_SJIS;
if(n < 0x80) return n;
if(n >= HALFWIDTH_10646)
return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
j1 = ((j>>8)&0xFF);
j2 = (j&0xFF);
s1 = ((j1 - 1) >> 1) + ((j1 <= 0x5E) ? 0x71 : 0xB1);
s2 = j2 + ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
return (s1<<8) + s2;
}
int
stack_sjis(unsigned char c, OtherStatePtr s)
{
if(s->sjis.buf < 0) {
if(c < 128 || (c >= 0xA0 && c <= 0xDF)) return c;
s->sjis.buf = c;
return -1;
} else {
int b;
if(c < 0x40 || c == 0x7F) {
s->sjis.buf = -1;
return c;
}
if(s->sjis.buf < 0xFF && c < 0xFF)
b = (s->sjis.buf << 8) + c;
else
b = -1;
s->sjis.buf = -1;
return b;
}
}
/*
Copyright (c) 2002 by Tomohiro KUBOTA
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* $XFree86: xc/programs/luit/other.h,v 1.1 2002/10/17 01:06:09 dawes Exp $ */
typedef struct {
FontMapPtr mapping;
FontMapReversePtr reverse;
int buf;
} aux_gbk;
typedef struct {
unsigned char buf[4];
int buf_ptr, len;
} aux_utf8;
typedef struct {
FontMapPtr x0208mapping;