Legacy Documentclose button

Important: The information in this document is obsolete and should not be used for new development.

Up Previous Next 

PATH 
Mac OS 8 and 9 Developer Documentation > Text Encoding Conversion Manager
Programming With the Text Encoding Conversion Manager



Text Encoding Base

You use a base text encoding data type to specify which text encoding or text encoding scheme you have used to express a given text. The text encoding base value is the primary specification of the source or target encoding. Values 0 through 32 correspond directly to Mac OS script codes. Values 33 through 254 are for other Mac OS encodings that do not have their own script codes, such as the Symbol encoding implemented by the Symbol font. You can also specify a meta-value as a base text encoding, such as kTextEncodingMacHFS and kTextEncodingUnicodeDefault. A meta-value is mapped to a real value.

The function GetTextEncodingBase returns the text encoding base of a text encoding specification.

A base text encoding is defined by the TextEncodingBase data type.

typedef UInt32 TextEncodingBase;

You can use these enumerated constants to specify base text encodings:

enum {
    /* Mac OS encodings */
    kTextEncodingMacRoman = 0L,
    kTextEncodingMacJapanese = 1,
    kTextEncodingMacChineseTrad = 2,
    kTextEncodingMacKorean = 3,
    kTextEncodingMacArabic = 4,
    kTextEncodingMacHebrew = 5,
    kTextEncodingMacGreek = 6,
    kTextEncodingMacCyrillic = 7,
    kTextEncodingMacDevanagari = 9,
    kTextEncodingMacGurmukhi = 10,
    kTextEncodingMacGujarati = 11,
    kTextEncodingMacOriya = 12,
    kTextEncodingMacBengali = 13,
    kTextEncodingMacTamil = 14,
    kTextEncodingMacTelugu = 15,
    kTextEncodingMacKannada = 16,
    kTextEncodingMacMalayalam = 17,
    kTextEncodingMacSinhalese = 18,
    kTextEncodingMacBurmese = 19,
    kTextEncodingMacKhmer = 20,
    kTextEncodingMacThai = 21,
    kTextEncodingMacLaotian = 22,
    kTextEncodingMacGeorgian = 23,
    kTextEncodingMacArmenian = 24,
    kTextEncodingMacChineseSimp = 25,
    kTextEncodingMacTibetan = 26,
    kTextEncodingMacMongolian = 27,
    kTextEncodingMacEthiopic = 28,
    kTextEncodingMacCentralEurRoman = 29,
    kTextEncodingMacVietnamese = 30,
    kTextEncodingMacExtArabic = 31,

    /* The following use script code 0, smRoman */
    kTextEncodingMacSymbol = 33,
    kTextEncodingMacDingbats = 34,
    kTextEncodingMacTurkish = 35,
    kTextEncodingMacCroatian = 36,
    kTextEncodingMacIcelandic = 37,
    kTextEncodingMacRomanian = 38,
    kTextEncodingMacCeltic = 39,
    kTextEncodingMacGaelic = 40,

    /* Beginning in Mac OS 8.5, the set of Mac OS script codes has been */
    /* extended for some Mac OS components to include Unicode. Some of */
    /* these components have only 7 bits available for script code, so */
    /* kTextEncodingUnicodeDefault cannot be used to indicate Unicode. */
    /* Instead, the following meta-value is used to indicate Unicode */
    /* handled as a special Mac OS script code; TEC handles this value */
    /* like kTextEncodingUnicodeDefault. */

    kTextEncodingMacUnicode = 0x7E, /* Meta-value, Unicode as Mac encoding */

    /* The following use script code 4, smArabic */
    kTextEncodingMacFarsi = 0x8C, /* Like MacArabic but uses Farsi digits */

    /* The following use script code 28, smEthiopic */
    kTextEncodingMacInuit = 0xEC,

    /* The following use script code 32, smUninterp */
    kTextEncodingMacVT100 = 0xFC, /* VT100/102 font: Latin-1 chars, box dwg... */

    /* Special Mac OS encodings */
    kTextEncodingMacHFS = 0xFF, /* metavalue. */

    /* Unicode & ISO UCS encodings begin at 0x100
    kTextEncodingUnicodeDefault = 0x100, /* Meta-value. */
    kTextEncodingUnicodeV1_1 = 0x101,
    kTextEncodingISO10646_1993 = 0x101, /* code points identical to Unicode 1.1 */
    kTextEncodingUnicodeV2_0 = 0x103, /* new location for Korean Hangul */
    kTextEncodingUnicodeV2_1 = 0x103, /* For TEC, Unicode 2.0 = 2.1 */

    /* ISO 8-bit and 7-bit encodings begin at 0x200 */
    kTextEncodingISOLatin1 = 0x201, /* ISO 8859-1 */
    kTextEncodingISOLatin2 = 0x202, /* ISO 8859-2 */
    kTextEncodingISOLatin3 = 0x203, /* ISO 8859-3 */
    kTextEncodingISOLatin4 = 0x204, /* ISO 8859-4 */
    kTextEncodingISOLatinCyrillic = 0x205, /* ISO 8859-5 */
    kTextEncodingISOLatinArabic = 0x206, /* ISO 8859-6, = ASMO 708, =DOS CP 708 */
    kTextEncodingISOLatinGreek = 0x207, /* ISO 8859-7 */
    kTextEncodingISOLatinHebrew = 0x208, /* ISO 8859-8 */
    kTextEncodingISOLatin5 = 0x209, /* ISO 8859-9 */
    kTextEncodingISOLatin6 = 0x020A, /* ISO 8859-10 */
    kTextEncodingISOLatin7 = 0x020D, /* ISO 8859-13, Baltic Rim */
    kTextEncodingISOLatin8 = 0x020E, /* ISO 8859-14, Celtic */
    kTextEncodingISOLatin9 = 0x020F, /* ISO 8859-15, 8859-1 + EURO etc */


    /* MS-DOS & Windows encodings begin at 0x400 */
    kTextEncodingDOSLatinUS = 0x400, /* code page 437 */
    kTextEncodingDOSGreek = 0x405, /* code page 737 (formerly 437G) */
    kTextEncodingDOSBalticRim = 0x406, /* code page 775 */
    kTextEncodingDOSLatin1 = 0x410, /* code page 850, "Multilingual" */
    kTextEncodingDOSGreek1 = 0x411, /* code page 851 */
    kTextEncodingDOSLatin2 = 0x412, /* code page 852, Slavic */
    kTextEncodingDOSCyrillic = 0x413, /* code page 855, IBM Cyrillic */
    kTextEncodingDOSTurkish = 0x414, /* code page 857, IBM Turkish */
    kTextEncodingDOSPortuguese = 0x415, /* code page 860 */
    kTextEncodingDOSIcelandic = 0x416, /* code page 861 */
    kTextEncodingDOSHebrew = 0x417, /* code page 862 */
    kTextEncodingDOSCanadianFrench = 0x418, /* code page 863 */
    kTextEncodingDOSArabic = 0x419, /* code page 864 */
    kTextEncodingDOSNordic = 0x41A, /* code page 865 */
    kTextEncodingDOSRussian = 0x41B, /* code page 866 */
    kTextEncodingDOSGreek2 = 0x41C, /* code page 869, IBM Modern Greek */
    kTextEncodingDOSThai = 0x41D, /* code page 874, also for Windows */
    kTextEncodingDOSJapanese = 0x420, /* code page 932, also for Windows */
    kTextEncodingDOSChineseSimplif = 0x421, /* code page 936, also for Windows */
    kTextEncodingDOSKorean = 0x422, /* code page 949, also for Windows;Unified Hangul */
    kTextEncodingDOSChineseTrad = 0x423, /* code page 950, also for Windows */
    kTextEncodingWindowsLatin1 = 0x500, /*code page 1252 */
    kTextEncodingWindowsANSI = 0x500, /* code page 1252 (alternate name) */
    kTextEncodingWindowsLatin2 = 0x501, /* code page 1250, Central Europe */
    kTextEncodingWindowsCyrillic = 0x502, /* code page 1251, Slavic Cyrillic */
    kTextEncodingWindowsGreek = 0x503, /* code page 1253 */
    kTextEncodingWindowsLatin5 = 0x504, /* code page 1254, Turkish */
    kTextEncodingWindowsHebrew = 0x505, /* code page 1255 */
    kTextEncodingWindowsArabic = 0x506, /* code page 1256 */
    kTextEncodingWindowsBalticRim = 0x507, /* code page 1257 */
    kTextEncodingWindowsVietnamese = 0x508, /* code page 1258 */
    kTextEncodingWindowsKoreanJohab =0x510, /* code page 1361, for Windows NT */

    /* Various national standards begin at 0x600 */
    kTextEncodingUS_ASCII = 0x600,
    kTextEncodingJIS_X0201_76 = 0x620,
    kTextEncodingJIS_X0208_83 = 0x621,
    kTextEncodingJIS_X0208_90 = 0x622,
    kTextEncodingJIS_X0212_90 = 0x623,
    kTextEncodingJIS_C6226_78 = 0x624,
    kTextEncodingGB_2312_80 = 0x630,
    kTextEncodingGBK_95 = 0x631, /* annex to GB 13000-93; for Windows 95 */
    kTextEncodingKSC_5601_87 = 0x640, /* same as KSC 5601-92 without Johab annex */
    kTextEncodingKSC_5601_92_Johab = 0x641, /* KSC 5601-92 Johab annex */
    kTextEncodingCNS_11643_92_P1 = 0x651, /* CNS 11643-1992 plane 1 */
    kTextEncodingCNS_11643_92_P2 = 0x652, /* CNS 11643-1992 plane 2 */
    kTextEncodingCNS_11643_92_P3 = 0x653, /* CNS 11643-1992 plane 3
                                         (11643-1986 plane 14) */

    /* ISO 2022 collections begin at 0x800 */
    kTextEncodingISO_2022_JP = 0x820,
    kTextEncodingISO_2022_JP_2 = 0x821,
    kTextEncodingISO_2022_CN = 0x830,
    kTextEncodingISO_2022_CN_EXT = 0x831,
    kTextEncodingISO_2022_KR = 0x840,

    /* EUC collections begin at 0x900 */
    kTextEncodingEUC_JP = 0x920, /* ISO 646,1-byte Katakana,JIS 208,JIS 212 */
    kTextEncodingEUC_CN = 0x930, /* ISO 646, GB 2312-80 */
    kTextEncodingEUC_TW = 0x931, /* ISO 646, CNS 11643-1992 Planes 1-16 */
    kTextEncodingEUC_KR = 0x940, /* ISO 646, KS C 5601-1987 */

    /* Miscellaneous standards begin at 0xA00 */
    kTextEncodingShiftJIS = 0xA01, /* plain Shift-JIS */
    kTextEncodingKOI8_R = 0xA02, /* Russian Internet standard */
    kTextEncodingBig5 = 0xA03, /* Big-5 */
    kTextEncodingMacRomanLatin1 = 0xA04, /* Mac OS Roman permuted to align
                                            with 8859-1 */
    kTextEncodingHZ_GB_2312 = 0xA05, /* HZ (RFC 1842, for Chinese mail & news) */

    /* Other platform encodings */
    kTextEncodingNextStepLatin = 0xB01, /* NextStep encoding */
    
    /* EBCDIC & IBM host encodings begin at 0xC00 */
    kTextEncodingEBCDIC_US = 0xC01, /* basic EBCDIC-US */
    kTextEncodingEBCDIC_CP037 = 0xC02, /* code page 037, extended EBCDIC-US Latin1 */
    
    /* Special value */
    kTextEncodingMultiRun = 0xFFF, /* Multiple encoded text, external run info */
    kTextEncodingUnknown = 0xFFFF /* Unknown or unspecified */
};


© 1999 Apple Computer, Inc. – (Last Updated 13 Dec 99)

Up Previous Next