Logo Search packages:      
Sourcecode: w3m version File versions  Download package

sjis.c

#include "wc.h"
#include "sjis.h"
#include "jis.h"
#include "wtf.h"
#include "ucs.h"

#include "map/jisx02132_sjis.map"
wc_uchar *wc_jisx0212_jisx02132_map = jisx02132_sjis_map;

#define C0  WC_SJIS_MAP_C0
#define GL  WC_SJIS_MAP_GL
#define LB  WC_SJIS_MAP_LB
#define S80 WC_SJIS_MAP_80
#define SK  WC_SJIS_MAP_SK
#define SL  WC_SJIS_MAP_SL
#define SH  WC_SJIS_MAP_SH
#define SX  WC_SJIS_MAP_SX
#define C1  WC_SJIS_MAP_C1
#define SA0 WC_SJIS_MAP_A0

wc_uint8 WC_SJIS_MAP[ 0x100 ] = {
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
    C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
    GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 
    LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,

    S80,SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL,
    SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL,
    SA0,SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK,
    SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK,
    SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK,
    SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK,
    SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH,
    SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, C1, C1, C1,
};

#define sjis_to_jisx0208(ub, lb) \
{ \
    ub -= (ub < 0xa0) ? 0x81 : 0xc1; \
    ub = (ub << 1) + 0x21; \
    if (lb < 0x9f) { \
      lb -= (lb > 0x7e) ? 0x20 : 0x1f; \
    } else { \
      ub++; \
      lb -= 0x7e; \
    } \
}
#define sjis_to_jisx02132(ub, lb) \
{ \
    if (lb < 0x9f) { \
      ub = sjis1_jisx02132_map[ub - 0xf0]; \
      lb -= (lb > 0x7e) ? 0x20 : 0x1f; \
    } else { \
      ub = sjis2_jisx02132_map[ub - 0xf0]; \
      lb -= 0x7e; \
    } \
}
#define jisx0208_to_sjis(ub, lb) \
{ \
    lb += (ub & 1) ? 0x1f : 0x7d; \
    if (lb > 0x7e) \
      lb++; \
    ub = (ub - 0x21) >> 1; \
    ub += (ub < 0x1f) ? 0x81 : 0xc1; \
}
#define jisx02132_to_sjis(ub, lb) \
{ \
    lb += (ub & 1) ? 0x1f : 0x7d; \
    if (lb > 0x7e) \
      lb++; \
    ub = jisx02132_sjis_map[ ub ]; \
}

wc_wchar_t
wc_sjis_to_jis(wc_wchar_t cc)
{
    wc_uchar ub, lb;

    ub = cc.code >> 8;
    lb = cc.code & 0xff;
    if (ub < 0xf0) {
      sjis_to_jisx0208(ub, lb);
      cc.ccs = WC_CCS_JIS_X_0208;
    } else {
      sjis_to_jisx02132(ub, lb);
      cc.ccs = WC_CCS_JIS_X_0213_2;
    }
    cc.code = ((wc_uint32)ub << 8) | lb;
    return cc;
}

wc_wchar_t
wc_jis_to_sjis(wc_wchar_t cc)
{
    wc_uchar ub, lb;

    ub = (cc.code >> 8) & 0x7f;
    lb = cc.code & 0x7f;
    if (cc.ccs == WC_CCS_JIS_X_0213_2) {
      jisx02132_to_sjis(ub, lb);
      if (! ub) {
          cc.ccs = WC_CCS_UNKNOWN_W;
          return cc;
      }
    } else {
      jisx0208_to_sjis(ub, lb);
    }
    cc.code = ((wc_uint32)ub << 8) | lb;
    return cc;
}

wc_wchar_t
wc_sjis_ext_to_cs94w(wc_wchar_t cc)
{
    wc_uchar ub, lb;

    ub = cc.code >> 8;
    lb = cc.code & 0xff;
    sjis_to_jisx0208(ub, lb);
    if (ub <= 0x7e) {
      cc.ccs = WC_CCS_SJIS_EXT_1;
    } else {
      ub -= 0x5e;
      cc.ccs = WC_CCS_SJIS_EXT_2;
    }
    cc.code = ((wc_uint32)ub << 8) | lb;
    return cc;
}

wc_wchar_t
wc_cs94w_to_sjis_ext(wc_wchar_t cc)
{
    wc_uchar ub, lb;

    ub = (cc.code >> 8) & 0x7f;
    lb = cc.code & 0x7f;
    if (cc.ccs == WC_CCS_SJIS_EXT_2)
      ub += 0x5e;
    jisx0208_to_sjis(ub, lb);
    cc.ccs = WC_CCS_SJIS_EXT;
    cc.code = ((wc_uint32)ub << 8) | lb;
    return cc;
}

wc_uint32
wc_sjis_ext1_to_N(wc_uint32 c)
{
    wc_uchar ub;

    ub = (c >> 8) & 0x7f;
    switch(ub) {
    case 0x2D:    /* 0x8740 - */
      ub = 0;
      break;
    case 0x79:    /* 0xED40 - */
    case 0x7A:    /* 0xED9F - */
    case 0x7B:    /* 0xEE40 - */
    case 0x7C:    /* 0xEE9F - */
      ub -= 0x78;
      break;
    default:
      return WC_C_SJIS_ERROR;
    }
    return ub * 0x5e + (c & 0x7f) - 0x21;
}

wc_uint32
wc_sjis_ext2_to_N(wc_uint32 c)
{
    wc_uchar ub;

    ub = (c >> 8) & 0x7f;
    switch(ub) {
    case 0x35:    /* 0xFA40 - */
    case 0x36:    /* 0xFA9F - */
    case 0x37:    /* 0xFB40 - */
    case 0x38:    /* 0xFB9F - */
    case 0x39:    /* 0xFC40 - */
      ub -= 0x30;
      break;
    default:
      return WC_C_SJIS_ERROR;
    }
    return ub * 0x5e + (c & 0x7f) - 0x21;
}

Str
wc_conv_from_sjis(Str is, wc_ces ces)
{
    Str os;
    wc_uchar *sp = (wc_uchar *)is->ptr;
    wc_uchar *ep = sp + is->length;
    wc_uchar *p;
    wc_uchar jis[2];
    int state = WC_SJIS_NOSTATE;
    wc_wchar_t cc;

    for (p = sp; p < ep && *p < 0x80; p++)
      ;
    if (p == ep)
      return is;
    os = Strnew_size(is->length);
    if (p > sp)
      Strcat_charp_n(os, is->ptr, (int)(p - sp));

    for (; p < ep; p++) {
      switch (state) {
      case WC_SJIS_NOSTATE:
          switch (WC_SJIS_MAP[*p]) {
          case SL:
            state = WC_SJIS_SHIFT_L;
            break;
          case SH:
            state = WC_SJIS_SHIFT_H;
            break;
          case SX:
            state = WC_SJIS_SHIFT_X;
            break;
          case SK:
            wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p);
            break;
          case S80:
          case SA0:
          case C1:
            wtf_push_unknown(os, p, 1);
            break;
          default:
            Strcat_char(os, (char)*p);
            break;
          }
          break;
      case WC_SJIS_SHIFT_L:
      case WC_SJIS_SHIFT_H:
          if (WC_SJIS_MAP[*p] & LB) {
            jis[0] = *(p-1);
            jis[1] = *p;
            sjis_to_jisx0208(jis[0], jis[1]);
            cc.code = ((wc_uint32)jis[0] << 8) | jis[1];
            cc.ccs = wc_jisx0208_or_jisx02131(cc.code);
            if (cc.ccs == WC_CCS_JIS_X_0208)
                wtf_push(os, cc.ccs, cc.code);
            else
                wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p);
          } else
            wtf_push_unknown(os, p-1, 2);
          state = WC_SJIS_NOSTATE;
          break;
      case WC_SJIS_SHIFT_X:
          if (WC_SJIS_MAP[*p] & LB)
            wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p);
          else
            wtf_push_unknown(os, p-1, 2);
          state = WC_SJIS_NOSTATE;
          break;
      }
    }
    switch (state) {
    case WC_SJIS_SHIFT_L:
    case WC_SJIS_SHIFT_H:
    case WC_SJIS_SHIFT_X:
      wtf_push_unknown(os, p-1, 1);
      break;
    }
    return os;
}

Str
wc_conv_from_sjisx0213(Str is, wc_ces ces)
{
    Str os;
    wc_uchar *sp = (wc_uchar *)is->ptr;
    wc_uchar *ep = sp + is->length;
    wc_uchar *p;
    wc_uchar jis[2];
    int state = WC_SJIS_NOSTATE;
    wc_wchar_t cc;

    for (p = sp; p < ep && *p < 0x80; p++)
      ;
    if (p == ep)
      return is;
    os = Strnew_size(is->length);
    if (p > sp)
      Strcat_charp_n(os, is->ptr, (int)(p - sp));

    for (; p < ep; p++) {
      switch (state) {
      case WC_SJIS_NOSTATE:
          switch (WC_SJIS_MAP[*p]) {
          case SL:
            state = WC_SJIS_SHIFT_L;
            break;
          case SH:
            state = WC_SJIS_SHIFT_H;
            break;
          case SX:
            state = WC_SJIS_SHIFT_X;
            break;
          case SK:
            wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p);
            break;
          case S80:
          case SA0:
          case C1:
            wtf_push_unknown(os, p, 1);
            break;
          default:
            Strcat_char(os, (char)*p);
            break;
          }
          break;
      case WC_SJIS_SHIFT_L:
      case WC_SJIS_SHIFT_H:
          if (WC_SJIS_MAP[*p] & LB) {
            jis[0] = *(p-1);
            jis[1] = *p;
            sjis_to_jisx0208(jis[0], jis[1]);
            cc.code = ((wc_uint32)jis[0] << 8) | jis[1];
            cc.ccs = wc_jisx0208_or_jisx02131(cc.code);
            wtf_push(os, cc.ccs, cc.code);
          } else
            wtf_push_unknown(os, p-1, 2);
          state = WC_SJIS_NOSTATE;
          break;
      case WC_SJIS_SHIFT_X:
          if (WC_SJIS_MAP[*p] & LB) {
            jis[0] = *(p-1);
            jis[1] = *p;
            sjis_to_jisx02132(jis[0], jis[1]);
            wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]);
          } else
            wtf_push_unknown(os, p-1, 2);
          state = WC_SJIS_NOSTATE;
          break;
      }
    }
    switch (state) {
    case WC_SJIS_SHIFT_L:
    case WC_SJIS_SHIFT_H:
    case WC_SJIS_SHIFT_X:
      wtf_push_unknown(os, p-1, 1);
      break;
    }
    return os;
}

void
wc_push_to_sjis(Str os, wc_wchar_t cc, wc_status *st)
{
    wc_uchar ub, lb;

  while (1) {
    switch (cc.ccs) {
    case WC_CCS_US_ASCII:
      Strcat_char(os, cc.code);
      return;
    case WC_CCS_JIS_X_0201K:
      if (WcOption.use_jisx0201k) {
          Strcat_char(os, cc.code | 0x80);
          return;
      } else if (WcOption.fix_width_conv)
          cc.ccs = WC_CCS_UNKNOWN;
      else
          cc = wc_jisx0201k_to_jisx0208(cc);
      continue;
    case WC_CCS_JIS_X_0208:
      ub = (cc.code >> 8) & 0x7f;
      lb = cc.code & 0x7f;
      jisx0208_to_sjis(ub, lb);
      Strcat_char(os, ub);
      Strcat_char(os, lb);
      return;
    case WC_CCS_SJIS_EXT_1:
    case WC_CCS_SJIS_EXT_2:
      cc = wc_cs94w_to_sjis_ext(cc);
    case WC_CCS_SJIS_EXT:
      Strcat_char(os, (char)(cc.code >> 8));
      Strcat_char(os, (char)(cc.code & 0xff));
      return;
    case WC_CCS_UNKNOWN_W:
      if (!WcOption.no_replace)
          Strcat_charp(os, WC_REPLACE_W);
      return;
    case WC_CCS_UNKNOWN:
      if (!WcOption.no_replace)
          Strcat_charp(os, WC_REPLACE);
      return;
    default:
#ifdef USE_UNICODE
      if (WcOption.ucs_conv)
          cc = wc_any_to_any_ces(cc, st);
      else
#endif
          cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
      continue;
    }
  }
}

void
wc_push_to_sjisx0213(Str os, wc_wchar_t cc, wc_status *st)
{
    wc_uchar ub, lb;

  while (1) {
    switch (cc.ccs) {
    case WC_CCS_US_ASCII:
      Strcat_char(os, cc.code);
      return;
    case WC_CCS_JIS_X_0201K:
      if (WcOption.use_jisx0201k) {
          Strcat_char(os, cc.code | 0x80);
          return;
      } else if (WcOption.fix_width_conv)
          cc.ccs = WC_CCS_UNKNOWN;
      else
          cc = wc_jisx0201k_to_jisx0208(cc);
      continue;
    case WC_CCS_JIS_X_0213_1:
      if (! WcOption.use_jisx0213) {
          cc.ccs = WC_CCS_UNKNOWN_W;
          continue;
      }
    case WC_CCS_JIS_X_0208:
      ub = (cc.code >> 8) & 0x7f;
      lb = cc.code & 0x7f;
      jisx0208_to_sjis(ub, lb);
      Strcat_char(os, ub);
      Strcat_char(os, lb);
      return;
    case WC_CCS_JIS_X_0213_2:
      if (! WcOption.use_jisx0213) {
          cc.ccs = WC_CCS_UNKNOWN_W;
          continue;
      }
      ub = (cc.code >> 8) & 0x7f;
      lb = cc.code & 0x7f;
      jisx02132_to_sjis(ub, lb);
      if (ub) {
          Strcat_char(os, ub);
          Strcat_char(os, lb);
          return;
      }
    case WC_CCS_UNKNOWN_W:
      if (!WcOption.no_replace)
          Strcat_charp(os, WC_REPLACE_W);
      return;
    case WC_CCS_UNKNOWN:
      if (!WcOption.no_replace)
          Strcat_charp(os, WC_REPLACE);
      return;
    default:
#ifdef USE_UNICODE
      if (WcOption.ucs_conv)
          cc = wc_any_to_any_ces(cc, st);
      else
#endif
          cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
      continue;
    }
  }
}

Str
wc_char_conv_from_sjis(wc_uchar c, wc_status *st)
{
    static Str os;
    static wc_uchar jis[2];
    wc_wchar_t cc;

    if (st->state == -1) {
      st->state = WC_SJIS_NOSTATE;
      os = Strnew_size(8);
    }

    switch (st->state) {
    case WC_SJIS_NOSTATE:
      switch (WC_SJIS_MAP[c]) {
      case SL:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_L;
          return NULL;
      case SH:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_H;
          return NULL;
      case SX:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_X;
          return NULL;
      case SK:
          wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c);
          break;
      case S80:
      case SA0:
      case C1:
          break;
      default:
          Strcat_char(os, (char)c);
          break;
      }
      break;
    case WC_SJIS_SHIFT_L:
    case WC_SJIS_SHIFT_H:
      if (WC_SJIS_MAP[c] & LB) {
          jis[1] = c;
          sjis_to_jisx0208(jis[0], jis[1]);
          cc.code = ((wc_uint32)jis[0] << 8) | jis[1];
          cc.ccs = wc_jisx0208_or_jisx02131(cc.code);
          if (cc.ccs == WC_CCS_JIS_X_0208)
            wtf_push(os, cc.ccs, cc.code);
          else
              wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]);
      }
      st->state = WC_SJIS_NOSTATE;
      break;
    case WC_SJIS_SHIFT_X:
      if (WC_SJIS_MAP[c] & LB) {
          jis[1] = c;
          wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]);
      }
      st->state = WC_SJIS_NOSTATE;
      break;
    }
    st->state = -1;
    return os;
}

Str
wc_char_conv_from_sjisx0213(wc_uchar c, wc_status *st)
{
    static Str os;
    static wc_uchar jis[2];
    wc_wchar_t cc;

    if (st->state == -1) {
      st->state = WC_SJIS_NOSTATE;
      os = Strnew_size(8);
    }

    switch (st->state) {
    case WC_SJIS_NOSTATE:
      switch (WC_SJIS_MAP[c]) {
      case SL:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_L;
          return NULL;
      case SH:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_H;
          return NULL;
      case SX:
          jis[0] = c;
          st->state = WC_SJIS_SHIFT_X;
          return NULL;
      case SK:
          wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c);
          break;
      case S80:
      case SA0:
      case C1:
          break;
      default:
          Strcat_char(os, (char)c);
          break;
      }
      break;
    case WC_SJIS_SHIFT_L:
    case WC_SJIS_SHIFT_H:
      if (WC_SJIS_MAP[c] & LB) {
          jis[1] = c;
          sjis_to_jisx0208(jis[0], jis[1]);
          cc.code = ((wc_uint32)jis[0] << 8) | jis[1];
          cc.ccs = wc_jisx0208_or_jisx02131(cc.code);
          wtf_push(os, cc.ccs, cc.code);
      }
      st->state = WC_SJIS_NOSTATE;
      break;
    case WC_SJIS_SHIFT_X:
      if (WC_SJIS_MAP[c] & LB) {
          jis[1] = c;
          sjis_to_jisx02132(jis[0], jis[1]);
          wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]);
      }
      st->state = WC_SJIS_NOSTATE;
      break;
    }
    st->state = -1;
    return os;
}

Generated by  Doxygen 1.6.0   Back to index