Mac OS 9
TextCommon.h
Go to the documentation of this file.
1 
19 #ifndef __TEXTCOMMON__
20 #define __TEXTCOMMON__
21 
22 #ifndef __MACTYPES__
23 #include <MacTypes.h>
24 #endif
25 
26 #if PRAGMA_ONCE
27 #pragma once
28 #endif
29 
30 #ifdef __cplusplus
31 extern "C"
32 {
33 #endif
34 
35 #if PRAGMA_IMPORT
36 #pragma import on
37 #endif
38 
39 #if PRAGMA_STRUCT_ALIGN
40 #pragma options align = mac68k
41 #elif PRAGMA_STRUCT_PACKPUSH
42 #pragma pack(push, 2)
43 #elif PRAGMA_STRUCT_PACK
44 #pragma pack(2)
45 #endif
46 
47  /* TextEncodingBase type & values */
48  /* (values 0-32 correspond to the Script Codes defined in Inside Macintosh: Text
49  * pages 6-52 and 6-53 */
50  typedef UInt32 TextEncodingBase;
51  enum
52  {
53  /* Mac OS encodings*/
54  kTextEncodingMacRoman = 0L,
55  kTextEncodingMacJapanese = 1,
56  kTextEncodingMacChineseTrad = 2,
57  kTextEncodingMacKorean = 3,
58  kTextEncodingMacArabic = 4,
59  kTextEncodingMacHebrew = 5,
60  kTextEncodingMacGreek = 6,
61  kTextEncodingMacCyrillic = 7,
62  kTextEncodingMacDevanagari = 9,
63  kTextEncodingMacGurmukhi = 10,
64  kTextEncodingMacGujarati = 11,
65  kTextEncodingMacOriya = 12,
66  kTextEncodingMacBengali = 13,
67  kTextEncodingMacTamil = 14,
68  kTextEncodingMacTelugu = 15,
69  kTextEncodingMacKannada = 16,
70  kTextEncodingMacMalayalam = 17,
71  kTextEncodingMacSinhalese = 18,
72  kTextEncodingMacBurmese = 19,
73  kTextEncodingMacKhmer = 20,
74  kTextEncodingMacThai = 21,
75  kTextEncodingMacLaotian = 22,
76  kTextEncodingMacGeorgian = 23,
77  kTextEncodingMacArmenian = 24,
78  kTextEncodingMacChineseSimp = 25,
79  kTextEncodingMacTibetan = 26,
80  kTextEncodingMacMongolian = 27,
81  kTextEncodingMacEthiopic = 28,
82  kTextEncodingMacCentralEurRoman = 29,
83  kTextEncodingMacVietnamese = 30,
84  kTextEncodingMacExtArabic = 31, /* The following use script code 0, smRoman*/
85  kTextEncodingMacSymbol = 33,
86  kTextEncodingMacDingbats = 34,
87  kTextEncodingMacTurkish = 35,
88  kTextEncodingMacCroatian = 36,
89  kTextEncodingMacIcelandic = 37,
90  kTextEncodingMacRomanian = 38,
91  kTextEncodingMacCeltic = 39,
92  kTextEncodingMacGaelic = 40,
93  kTextEncodingMacKeyboardGlyphs = 41
94  };
95 
96  /* The following are older names for backward compatibility*/
97  enum
98  {
99  kTextEncodingMacTradChinese = kTextEncodingMacChineseTrad,
100  kTextEncodingMacRSymbol = 8,
101  kTextEncodingMacSimpChinese = kTextEncodingMacChineseSimp,
102  kTextEncodingMacGeez = kTextEncodingMacEthiopic,
103  kTextEncodingMacEastEurRoman = kTextEncodingMacCentralEurRoman,
104  kTextEncodingMacUninterp = 32
105  };
106 
117  enum
118  {
119  kTextEncodingMacUnicode = 0x7E /* Meta-value, Unicode as a Mac encoding*/
120  };
121 
122  /* Variant Mac OS encodings that use script codes other than 0*/
123  enum
124  {
125  /* The following use script code 4, smArabic*/
126  kTextEncodingMacFarsi = 0x8C, /* Like MacArabic but uses Farsi digits*/
127  /* The following use script code 7, smCyrillic*/
128  kTextEncodingMacUkrainian = 0x98, /* Meta-value in TEC 1.5 & later; maps to
129  kTextEncodingMacCyrillic variant */
130  /* The following use script code 28, smEthiopic*/
131  kTextEncodingMacInuit =
132  0xEC, /* The following use script code 32, smUnimplemented*/
133  kTextEncodingMacVT100 = 0xFC /* VT100/102 font from Comm Toolbox: Latin-1
134  repertoire + box drawing etc*/
135  };
136 
137  /* Special Mac OS encodings*/
138  enum
139  {
140  kTextEncodingMacHFS = 0xFF /* Meta-value, should never appear in a table.*/
141  };
142 
143  /* Unicode & ISO UCS encodings begin at 0x100*/
144  enum
145  {
146  kTextEncodingUnicodeDefault =
147  0x0100, /* Meta-value, should never appear in a table.*/
148  kTextEncodingUnicodeV1_1 = 0x0101,
149  kTextEncodingISO10646_1993 = 0x0101, /* Code points identical to Unicode 1.1*/
150  kTextEncodingUnicodeV2_0 = 0x0103, /* New location for Korean Hangul*/
151  kTextEncodingUnicodeV2_1 =
152  0x0103, /* We treat both Unicode 2.0 and Unicode 2.1 as 2.1*/
153  kTextEncodingUnicodeV3_0 = 0x0104,
154  kTextEncodingUnicodeV3_1 =
155  0x0105, /* Adds characters requiring surrogate pairs in UTF-16*/
156  kTextEncodingUnicodeV3_2 = 0x0106
157  };
158 
159  /* ISO 8-bit and 7-bit encodings begin at 0x200*/
160  enum
161  {
162  kTextEncodingISOLatin1 = 0x0201, /* ISO 8859-1*/
163  kTextEncodingISOLatin2 = 0x0202, /* ISO 8859-2*/
164  kTextEncodingISOLatin3 = 0x0203, /* ISO 8859-3*/
165  kTextEncodingISOLatin4 = 0x0204, /* ISO 8859-4*/
166  kTextEncodingISOLatinCyrillic = 0x0205, /* ISO 8859-5*/
167  kTextEncodingISOLatinArabic = 0x0206, /* ISO 8859-6, = ASMO 708, =DOS CP 708*/
168  kTextEncodingISOLatinGreek = 0x0207, /* ISO 8859-7*/
169  kTextEncodingISOLatinHebrew = 0x0208, /* ISO 8859-8*/
170  kTextEncodingISOLatin5 = 0x0209, /* ISO 8859-9*/
171  kTextEncodingISOLatin6 = 0x020A, /* ISO 8859-10 */
172  kTextEncodingISOLatin7 = 0x020D, /* ISO 8859-13, Baltic Rim */
173  kTextEncodingISOLatin8 = 0x020E, /* ISO 8859-14, Celtic */
174  kTextEncodingISOLatin9 =
175  0x020F /* ISO 8859-15, 8859-1 changed for EURO & CP1252 letters */
176  };
177 
178  /* MS-DOS & Windows encodings begin at 0x400*/
179  enum
180  {
181  kTextEncodingDOSLatinUS = 0x0400, /* code page 437*/
182  kTextEncodingDOSGreek = 0x0405, /* code page 737 (formerly code page 437G)*/
183  kTextEncodingDOSBalticRim = 0x0406, /* code page 775*/
184  kTextEncodingDOSLatin1 = 0x0410, /* code page 850, "Multilingual"*/
185  kTextEncodingDOSGreek1 = 0x0411, /* code page 851*/
186  kTextEncodingDOSLatin2 = 0x0412, /* code page 852, Slavic*/
187  kTextEncodingDOSCyrillic = 0x0413, /* code page 855, IBM Cyrillic*/
188  kTextEncodingDOSTurkish = 0x0414, /* code page 857, IBM Turkish*/
189  kTextEncodingDOSPortuguese = 0x0415, /* code page 860*/
190  kTextEncodingDOSIcelandic = 0x0416, /* code page 861*/
191  kTextEncodingDOSHebrew = 0x0417, /* code page 862*/
192  kTextEncodingDOSCanadianFrench = 0x0418, /* code page 863*/
193  kTextEncodingDOSArabic = 0x0419, /* code page 864*/
194  kTextEncodingDOSNordic = 0x041A, /* code page 865*/
195  kTextEncodingDOSRussian = 0x041B, /* code page 866*/
196  kTextEncodingDOSGreek2 = 0x041C, /* code page 869, IBM Modern Greek*/
197  kTextEncodingDOSThai = 0x041D, /* code page 874, also for Windows*/
198  kTextEncodingDOSJapanese =
199  0x0420, /* code page 932, also for Windows; Shift-JIS with additions*/
200  kTextEncodingDOSChineseSimplif =
201  0x0421, /* code page 936, also for Windows; was EUC-CN, now GBK (EUC-CN
202  extended)*/
203  kTextEncodingDOSKorean = 0x0422, /* code page 949, also for Windows; Unified
204  Hangul Code (EUC-KR extended)*/
205  kTextEncodingDOSChineseTrad =
206  0x0423, /* code page 950, also for Windows; Big-5*/
207  kTextEncodingWindowsLatin1 = 0x0500, /* code page 1252*/
208  kTextEncodingWindowsANSI = 0x0500, /* code page 1252 (alternate name)*/
209  kTextEncodingWindowsLatin2 = 0x0501, /* code page 1250, Central Europe*/
210  kTextEncodingWindowsCyrillic = 0x0502, /* code page 1251, Slavic Cyrillic*/
211  kTextEncodingWindowsGreek = 0x0503, /* code page 1253*/
212  kTextEncodingWindowsLatin5 = 0x0504, /* code page 1254, Turkish*/
213  kTextEncodingWindowsHebrew = 0x0505, /* code page 1255*/
214  kTextEncodingWindowsArabic = 0x0506, /* code page 1256*/
215  kTextEncodingWindowsBalticRim = 0x0507, /* code page 1257*/
216  kTextEncodingWindowsVietnamese = 0x0508, /* code page 1258*/
217  kTextEncodingWindowsKoreanJohab = 0x0510 /* code page 1361, for Windows NT*/
218  };
219 
220  /* Various national standards begin at 0x600*/
221  enum
222  {
223  kTextEncodingUS_ASCII = 0x0600,
224  kTextEncodingJIS_X0201_76 =
225  0x0620, /* JIS Roman and 1-byte katakana (halfwidth)*/
226  kTextEncodingJIS_X0208_83 = 0x0621,
227  kTextEncodingJIS_X0208_90 = 0x0622,
228  kTextEncodingJIS_X0212_90 = 0x0623,
229  kTextEncodingJIS_C6226_78 = 0x0624,
230  kTextEncodingShiftJIS_X0213_00 =
231  0x0628, /* Shift-JIS format encoding of JIS X0213 planes 1 and 2*/
232  kTextEncodingGB_2312_80 = 0x0630,
233  kTextEncodingGBK_95 =
234  0x0631, /* annex to GB 13000-93; for Windows 95; EUC-CN extended*/
235  kTextEncodingGB_18030_2000 = 0x0632,
236  kTextEncodingKSC_5601_87 =
237  0x0640, /* same as KSC 5601-92 without Johab annex*/
238  kTextEncodingKSC_5601_92_Johab = 0x0641, /* KSC 5601-92 Johab annex*/
239  kTextEncodingCNS_11643_92_P1 = 0x0651, /* CNS 11643-1992 plane 1*/
240  kTextEncodingCNS_11643_92_P2 = 0x0652, /* CNS 11643-1992 plane 2*/
241  kTextEncodingCNS_11643_92_P3 =
242  0x0653 /* CNS 11643-1992 plane 3 (was plane 14 in 1986 version)*/
243  };
244 
245  /* ISO 2022 collections begin at 0x800*/
246  enum
247  {
248  kTextEncodingISO_2022_JP = 0x0820, /* RFC 1468*/
249  kTextEncodingISO_2022_JP_2 = 0x0821, /* RFC 1554*/
250  kTextEncodingISO_2022_JP_1 = 0x0822, /* RFC 2237*/
251  kTextEncodingISO_2022_JP_3 = 0x0823, /* JIS X0213*/
252  kTextEncodingISO_2022_CN = 0x0830,
253  kTextEncodingISO_2022_CN_EXT = 0x0831,
254  kTextEncodingISO_2022_KR = 0x0840
255  };
256 
257  /* EUC collections begin at 0x900*/
258  enum
259  {
260  kTextEncodingEUC_JP = 0x0920, /* ISO 646, 1-byte katakana, JIS 208, JIS 212*/
261  kTextEncodingEUC_CN = 0x0930, /* ISO 646, GB 2312-80*/
262  kTextEncodingEUC_TW = 0x0931, /* ISO 646, CNS 11643-1992 Planes 1-16*/
263  kTextEncodingEUC_KR = 0x0940 /* ISO 646, KS C 5601-1987*/
264  };
265 
266  /* Misc standards begin at 0xA00*/
267  enum
268  {
269  kTextEncodingShiftJIS = 0x0A01, /* plain Shift-JIS*/
270  kTextEncodingKOI8_R = 0x0A02, /* Russian internet standard*/
271  kTextEncodingBig5 = 0x0A03, /* Big-5 (has variants)*/
272  kTextEncodingMacRomanLatin1 =
273  0x0A04, /* Mac OS Roman permuted to align with ISO Latin-1*/
274  kTextEncodingHZ_GB_2312 = 0x0A05, /* HZ (RFC 1842, for Chinese mail & news)*/
275  kTextEncodingBig5_HKSCS_1999 =
276  0x0A06 /* Big-5 with Hong Kong special char set supplement*/
277  };
278 
279  /* Other platform encodings*/
280  enum
281  {
282  kTextEncodingNextStepLatin = 0x0B01 /* NextStep encoding*/
283  };
284 
285  /* EBCDIC & IBM host encodings begin at 0xC00*/
286  enum
287  {
288  kTextEncodingEBCDIC_US = 0x0C01, /* basic EBCDIC-US*/
289  kTextEncodingEBCDIC_CP037 =
290  0x0C02 /* code page 037, extended EBCDIC (Latin-1 set) for US,Canada...*/
291  };
292 
293  /* Special values*/
294  enum
295  {
296  kTextEncodingMultiRun =
297  0x0FFF, /* Multi-encoding text with external run info*/
298  kTextEncodingUnknown = 0xFFFF /* Unknown or unspecified */
299  };
300 
301  /* TextEncodingVariant type & values */
302  typedef UInt32 TextEncodingVariant;
303  /* Default TextEncodingVariant, for any TextEncodingBase*/
304  enum
305  {
306  kTextEncodingDefaultVariant = 0
307  };
308 
309  /* Variants of kTextEncodingMacRoman */
310  enum
311  {
312  kMacRomanDefaultVariant =
313  0, /* meta value, maps to 1 or 2 depending on System */
314  kMacRomanCurrencySignVariant =
315  1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN*/
316  kMacRomanEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */
317  };
318 
319  /* Variants of kTextEncodingMacCyrillic (for TEC 1.5 and later) */
320  enum
321  {
322  kMacCyrillicDefaultVariant =
323  0, /* meta value, maps to 1, 2, or 3 depending on System*/
324  kMacCyrillicCurrSignStdVariant =
325  1, /* Mac OS < 9.0 (RU,BG), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = CENT /
326  PARTIAL DIFF.*/
327  kMacCyrillicCurrSignUkrVariant =
328  2, /* Mac OS < 9.0 (UA,LangKit), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = GHE
329  WITH UPTURN*/
330  kMacCyrillicEuroSignVariant =
331  3 /* Mac OS >= 9.0, 0xFF is EURO SIGN, 0xA2/0xB6 = GHE WITH UPTURN*/
332  };
333 
334  /* Variants of kTextEncodingMacIcelandic */
335  enum
336  {
337  kMacIcelandicStdDefaultVariant =
338  0, /* meta value, maps to 2 or 4 depending on System */
339  kMacIcelandicTTDefaultVariant =
340  1, /* meta value, maps to 3 or 5 depending on System */
341  /* The following are for Mac OS version < 8.5, 0xDB is CURRENCY SIGN */
342  kMacIcelandicStdCurrSignVariant =
343  2, /* 0xBB/0xBC are fem./masc. ordinal indicators*/
344  kMacIcelandicTTCurrSignVariant = 3, /* 0xBB/0xBC are fi/fl ligatures*/
345  /* The following are for Mac OS version >= 8.5, 0xDB is EURO SIGN */
346  kMacIcelandicStdEuroSignVariant =
347  4, /* 0xBB/0xBC are fem./masc. ordinal indicators*/
348  kMacIcelandicTTEuroSignVariant = 5 /* 0xBB/0xBC are fi/fl ligatures*/
349  };
350 
351  /* Variants of kTextEncodingMacCroatian */
352  enum
353  {
354  kMacCroatianDefaultVariant =
355  0, /* meta value, maps to 1 or 2 depending on System */
356  kMacCroatianCurrencySignVariant =
357  1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */
358  kMacCroatianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */
359  };
360 
361  /* Variants of kTextEncodingMacRomanian */
362  enum
363  {
364  kMacRomanianDefaultVariant =
365  0, /* meta value, maps to 1 or 2 depending on System */
366  kMacRomanianCurrencySignVariant =
367  1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */
368  kMacRomanianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */
369  };
370 
371  /* Variants of kTextEncodingMacJapanese*/
372  enum
373  {
374  kMacJapaneseStandardVariant = 0,
375  kMacJapaneseStdNoVerticalsVariant = 1,
376  kMacJapaneseBasicVariant = 2,
377  kMacJapanesePostScriptScrnVariant = 3,
378  kMacJapanesePostScriptPrintVariant = 4,
379  kMacJapaneseVertAtKuPlusTenVariant = 5
380  };
381 
382  /* Variants of kTextEncodingMacArabic*/
383  enum
384  {
385  kMacArabicStandardVariant =
386  0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Cairo)*/
387  kMacArabicTrueTypeVariant =
388  1, /* 0xC0 is asterisk, 0x2A & 0xAA are multiply signs (e.g. Baghdad)*/
389  kMacArabicThuluthVariant =
390  2, /* 0xC0 is Arabic five-point star, 0x2A & 0xAA are multiply signs*/
391  kMacArabicAlBayanVariant =
392  3 /* 8-spoke asterisk, multiply sign, Koranic ligatures & parens*/
393  };
394 
395  /* Variants of kTextEncodingMacFarsi*/
396  enum
397  {
398  kMacFarsiStandardVariant =
399  0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Tehran)*/
400  kMacFarsiTrueTypeVariant =
401  1 /* asterisk, multiply signs, Koranic ligatures, geometric shapes*/
402  };
403 
404  /* Variants of kTextEncodingMacHebrew*/
405  enum
406  {
407  kMacHebrewStandardVariant = 0,
408  kMacHebrewFigureSpaceVariant = 1
409  };
410 
411  /* Variants of kTextEncodingMacVT100 */
412  enum
413  {
414  kMacVT100DefaultVariant =
415  0, /* meta value, maps to 1 or 2 depending on System */
416  kMacVT100CurrencySignVariant =
417  1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */
418  kMacVT100EuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */
419  };
420 
421  /* Variants of Unicode & ISO 10646 encodings*/
422  enum
423  {
424  kUnicodeNoSubset = 0,
425  kUnicodeCanonicalDecompVariant =
426  2 /* canonical decomposition; excludes composed characters*/
427  };
428 
429  /* Variants of Big-5 encoding*/
430  enum
431  {
432  kBig5_BasicVariant = 0,
433  kBig5_StandardVariant =
434  1, /* 0xC6A1-0xC7FC: kana, Cyrillic, enclosed numerics*/
435  kBig5_ETenVariant =
436  2 /* adds kana, Cyrillic, radicals, etc with hi bytes C6-C8,F9*/
437  };
438 
439  /* Variants of MacRomanLatin1 */
440  enum
441  {
442  kMacRomanLatin1DefaultVariant =
443  0, /* meta value, maps to others depending on System*/
444  kMacRomanLatin1StandardVariant = 2, /* permuted MacRoman, EuroSignVariant*/
445  kMacRomanLatin1TurkishVariant = 6, /* permuted MacTurkish*/
446  kMacRomanLatin1CroatianVariant = 8, /* permuted MacCroatian, EuroSignVariant*/
447  kMacRomanLatin1IcelandicVariant =
448  11, /* permuted MacIcelandic, StdEuroSignVariant*/
449  kMacRomanLatin1RomanianVariant = 14 /* permuted MacRomanian, EuroSignVariant*/
450  };
451 
452  /* Unicode variants not yet supported (and not fully defined)*/
453  enum
454  {
455  kUnicodeNoCompatibilityVariant = 1,
456  kUnicodeNoComposedVariant = 3,
457  kUnicodeNoCorporateVariant = 4
458  };
459 
460  /* The following are older names for backward compatibility*/
461  enum
462  {
463  kMacRomanStandardVariant = 0,
464  kMacIcelandicStandardVariant = 0,
465  kMacIcelandicTrueTypeVariant = 1,
466  kJapaneseStandardVariant = 0,
467  kJapaneseStdNoVerticalsVariant = 1,
468  kJapaneseBasicVariant = 2,
469  kJapanesePostScriptScrnVariant = 3,
470  kJapanesePostScriptPrintVariant = 4,
471  kJapaneseVertAtKuPlusTenVariant =
472  5, /* kJapaneseStdNoOneByteKanaVariant = 6, // replaced by
473  kJapaneseNoOneByteKanaOption*/
474  /* kJapaneseBasicNoOneByteKanaVariant = 7, // replaced by
475  kJapaneseNoOneByteKanaOption */
476  kHebrewStandardVariant = 0,
477  kHebrewFigureSpaceVariant = 1,
478  kUnicodeMaxDecomposedVariant =
479  2, /* replaced by kUnicodeCanonicalDecompVariant*/
480  /* The following Japanese variant options were never supported and are now
481  deprecated.*/
482  /* In TEC 1.4 and later their functionality is replaced by the Unicode
483  Converter options listed.*/
484  kJapaneseNoOneByteKanaOption =
485  0x20, /* replaced by UnicodeConverter option kUnicodeNoHalfwidthCharsBit*/
486  kJapaneseUseAsciiBackslashOption =
487  0x40 /* replaced by UnicodeConverter option kUnicodeForceASCIIRangeBit*/
488  };
489 
490  /* TextEncodingFormat type & values */
491  typedef UInt32 TextEncodingFormat;
492  enum
493  {
494  /* Default TextEncodingFormat for any TextEncodingBase*/
495  kTextEncodingDefaultFormat = 0, /* Formats for Unicode & ISO 10646*/
496  kUnicode16BitFormat = 0,
497  kUnicodeUTF7Format = 1,
498  kUnicodeUTF8Format = 2,
499  kUnicode32BitFormat = 3
500  };
501 
502  /* TextEncoding type */
503  typedef UInt32 TextEncoding;
504  /* name part selector for GetTextEncodingName*/
505  typedef UInt32 TextEncodingNameSelector;
506  enum
507  {
508  kTextEncodingFullName = 0,
509  kTextEncodingBaseName = 1,
510  kTextEncodingVariantName = 2,
511  kTextEncodingFormatName = 3
512  };
513 
514  /* Types used in conversion */
516  {
517  ByteOffset offset;
518  TextEncoding textEncoding;
519  };
520  typedef struct TextEncodingRun TextEncodingRun;
524  {
525  ByteOffset offset;
526  ScriptCode script;
527  };
528  typedef struct ScriptCodeRun ScriptCodeRun;
530  typedef const ScriptCodeRun *ConstScriptCodeRunPtr;
531  typedef UInt8 *TextPtr;
532  typedef const UInt8 *ConstTextPtr;
533  /* Basic types for Unicode characters and strings:*/
534  typedef UniChar *UniCharArrayPtr;
535  typedef const UniChar *ConstUniCharArrayPtr;
540  typedef UniCharArrayPtr *UniCharArrayHandle;
545  typedef UInt32 UniCharArrayOffset;
546  /* enums for TextEncoding Conversion routines*/
547  enum
548  {
549  kTextScriptDontCare = -128,
550  kTextLanguageDontCare = -128,
551  kTextRegionDontCare = -128
552  };
553 
554  /* struct for TECGetInfo*/
555 
556  struct TECInfo
557  {
558  UInt16 format; /* format code for this struct*/
559  UInt16 tecVersion; /* TEC version in BCD, e.g. 0x0121 for 1.2.1*/
560  UInt32 tecTextConverterFeatures; /* bitmask indicating TEC features/fixes*/
561  UInt32 tecUnicodeConverterFeatures; /* bitmask indicating UnicodeConverter
562  features/fixes*/
563  UInt32
564  tecTextCommonFeatures; /* bitmask indicating TextCommon features/fixes*/
565  Str31 tecTextEncodingsFolderName; /* localized name of Text Encodings folder
566  (pascal string)*/
567  Str31
568  tecExtensionFileName; /* localized name of TEC extension (pascal string)*/
569  UInt16 tecLowestTEFileVersion; /* Lowest version (BCD) of all files in Text
570  Encodings folder*/
571  UInt16 tecHighestTEFileVersion; /* Highest version (BCD) of all files in Text
572  Encodings folder*/
573  };
574  typedef struct TECInfo TECInfo;
575  typedef TECInfo *TECInfoPtr;
576  typedef TECInfoPtr *TECInfoHandle;
577  /* Value for TECInfo format code*/
578  enum
579  {
580  kTECInfoCurrentFormat =
581  2 /* any future formats will just add fields at the end*/
582  };
583 
626  enum
627  {
628  kTECKeepInfoFixBit = 0,
629  kTECFallbackTextLengthFixBit = 1,
630  kTECTextRunBitClearFixBit = 2,
631  kTECTextToUnicodeScanFixBit = 3,
632  kTECAddForceASCIIChangesBit = 4,
633  kTECPreferredEncodingFixBit = 5,
634  kTECAddTextRunHeuristicsBit = 6,
635  kTECAddFallbackInterruptBit = 7
636  };
637 
638  enum
639  {
640  kTECKeepInfoFixMask = 1L << kTECKeepInfoFixBit,
641  kTECFallbackTextLengthFixMask = 1L << kTECFallbackTextLengthFixBit,
642  kTECTextRunBitClearFixMask = 1L << kTECTextRunBitClearFixBit,
643  kTECTextToUnicodeScanFixMask = 1L << kTECTextToUnicodeScanFixBit,
644  kTECAddForceASCIIChangesMask = 1L << kTECAddForceASCIIChangesBit,
645  kTECPreferredEncodingFixMask = 1L << kTECPreferredEncodingFixBit,
646  kTECAddTextRunHeuristicsMask = 1L << kTECAddTextRunHeuristicsBit,
647  kTECAddFallbackInterruptMask = 1L << kTECAddFallbackInterruptBit
648  };
649 
656  enum
657  {
658  kUnicodeByteOrderMark = 0xFEFF,
659  kUnicodeObjectReplacement = 0xFFFC, /* placeholder for non-text object*/
660  kUnicodeReplacementChar =
661  0xFFFD, /* Unicode replacement for unconvertable input char*/
662  kUnicodeSwappedByteOrderMark =
663  0xFFFE, /* not a Unicode char; byte-swapped version of FEFF*/
664  kUnicodeNotAChar = 0xFFFF /* not a Unicode char; may be used as a terminator*/
665  };
666 
672  typedef SInt32 UCCharPropertyType;
673  enum
674  {
675  kUCCharPropTypeGenlCategory = 1, /* requests enumeration value*/
676  kUCCharPropTypeCombiningClass = 2, /* requests numeric value 0..255*/
677  kUCCharPropTypeBidiCategory = 3 /* requests enumeration value*/
678  };
679 
680  typedef UInt32 UCCharPropertyValue;
681  /* General Category enumeration values (requested by
682  * kUCCharPropTypeGenlCategory)*/
683  enum
684  {
685  /* Normative categories:*/
686  kUCGenlCatOtherNotAssigned = 0, /* Cn Other, Not Assigned*/
687  kUCGenlCatOtherControl = 1, /* Cc Other, Control*/
688  kUCGenlCatOtherFormat = 2, /* Cf Other, Format*/
689  kUCGenlCatOtherSurrogate = 3, /* Cs Other, Surrogate*/
690  kUCGenlCatOtherPrivateUse = 4, /* Co Other, Private Use*/
691  kUCGenlCatMarkNonSpacing = 5, /* Mn Mark, Non-Spacing*/
692  kUCGenlCatMarkSpacingCombining = 6, /* Mc Mark, Spacing Combining*/
693  kUCGenlCatMarkEnclosing = 7, /* Me Mark, Enclosing*/
694  kUCGenlCatNumberDecimalDigit = 8, /* Nd Number, Decimal Digit*/
695  kUCGenlCatNumberLetter = 9, /* Nl Number, Letter*/
696  kUCGenlCatNumberOther = 10, /* No Number, Other*/
697  kUCGenlCatSeparatorSpace = 11, /* Zs Separator, Space*/
698  kUCGenlCatSeparatorLine = 12, /* Zl Separator, Line*/
699  kUCGenlCatSeparatorParagraph = 13, /* Zp Separator, Paragraph*/
700  kUCGenlCatLetterUppercase = 14, /* Lu Letter, Uppercase*/
701  kUCGenlCatLetterLowercase = 15, /* Ll Letter, Lowercase*/
702  kUCGenlCatLetterTitlecase = 16, /* Lt Letter, Titlecase*/
703  /* Informative categories:*/
704  kUCGenlCatLetterModifier = 17, /* Lm Letter, Modifier*/
705  kUCGenlCatLetterOther = 18, /* Lo Letter, Other*/
706  kUCGenlCatPunctConnector = 20, /* Pc Punctuation, Connector*/
707  kUCGenlCatPunctDash = 21, /* Pd Punctuation, Dash*/
708  kUCGenlCatPunctOpen = 22, /* Ps Punctuation, Open*/
709  kUCGenlCatPunctClose = 23, /* Pe Punctuation, Close*/
710  kUCGenlCatPunctInitialQuote = 24, /* Pi Punctuation, Initial quote*/
711  kUCGenlCatPunctFinalQuote = 25, /* Pf Punctuation, Final quote*/
712  kUCGenlCatPunctOther = 26, /* Po Punctuation, Other*/
713  kUCGenlCatSymbolMath = 28, /* Sm Symbol, Math*/
714  kUCGenlCatSymbolCurrency = 29, /* Sc Symbol, Currency*/
715  kUCGenlCatSymbolModifier = 30, /* Sk Symbol, Modifier*/
716  kUCGenlCatSymbolOther = 31 /* So Symbol, Other*/
717  };
718 
719  /* Bidirectional Category enumeration values (requested by
720  * kUCCharPropTypeBidiCategory)*/
721  enum
722  {
723  kUCBidiCatNotApplicable = 0, /* for now use this for unassigned*/
724  /* Strong types:*/
725  kUCBidiCatLeftRight = 1, /* L Left-to-Right*/
726  kUCBidiCatRightLeft = 2, /* R Right-to-Left*/
727  /* Weak types:*/
728  kUCBidiCatEuroNumber = 3, /* EN European Number*/
729  kUCBidiCatEuroNumberSeparator = 4, /* ES European Number Separator*/
730  kUCBidiCatEuroNumberTerminator = 5, /* ET European Number Terminator*/
731  kUCBidiCatArabicNumber = 6, /* AN Arabic Number*/
732  kUCBidiCatCommonNumberSeparator = 7, /* CS Common Number Separator*/
733  /* Separators:*/
734  kUCBidiCatBlockSeparator =
735  8, /* B Paragraph Separator (was Block Separator)*/
736  kUCBidiCatSegmentSeparator = 9, /* S Segment Separator*/
737  /* Neutrals:*/
738  kUCBidiCatWhitespace = 10, /* WS Whitespace*/
739  kUCBidiCatOtherNeutral =
740  11, /* ON Other Neutrals (unassigned codes could use this)*/
741  /* New categories for Unicode 3.0*/
742  kUCBidiCatRightLeftArabic =
743  12, /* AL Right-to-Left Arabic (was Arabic Letter)*/
744  kUCBidiCatLeftRightEmbedding = 13, /* LRE Left-to-Right Embedding*/
745  kUCBidiCatRightLeftEmbedding = 14, /* RLE Right-to-Left Embedding*/
746  kUCBidiCatLeftRightOverride = 15, /* LRO Left-to-Right Override*/
747  kUCBidiCatRightLeftOverride = 16, /* RLO Right-to-Left Override*/
748  kUCBidiCatPopDirectionalFormat = 17, /* PDF Pop Directional Format*/
749  kUCBidiCatNonSpacingMark = 18, /* NSM Non-Spacing Mark*/
750  kUCBidiCatBoundaryNeutral = 19 /* BN Boundary Neutral*/
751  };
752 
767  TextEncoding
768  CreateTextEncoding(TextEncodingBase encodingBase,
769  TextEncodingVariant encodingVariant,
770  TextEncodingFormat encodingFormat);
771 
780  TextEncodingBase
781  GetTextEncodingBase(TextEncoding encoding);
782 
791  TextEncodingVariant
792  GetTextEncodingVariant(TextEncoding encoding);
793 
802  TextEncodingFormat
803  GetTextEncodingFormat(TextEncoding encoding);
804 
813  TextEncoding
814  ResolveDefaultTextEncoding(TextEncoding encoding);
815 
824  OSStatus
825  GetTextEncodingName(TextEncoding iEncoding,
826  TextEncodingNameSelector iNamePartSelector,
827  RegionCode iPreferredRegion,
828  TextEncoding iPreferredEncoding, ByteCount iOutputBufLen,
829  ByteCount *oNameLength,
830  RegionCode *oActualRegion, /* can be NULL */
831  TextEncoding *oActualEncoding, /* can be NULL */
832  TextPtr oEncodingName);
833 
842  OSStatus
844 
853  OSStatus
854  UpgradeScriptInfoToTextEncoding(ScriptCode iTextScriptID,
855  LangCode iTextLanguageID, RegionCode iRegionID,
856  ConstStr255Param iTextFontname,
857  TextEncoding *oEncoding);
858 
867  OSStatus
868  RevertTextEncodingToScriptInfo(TextEncoding iEncoding,
869  ScriptCode *oTextScriptID,
870  LangCode *oTextLanguageID, /* can be NULL */
871  Str255 oTextFontname); /* can be NULL */
872 
881  OSStatus
882  NearestMacTextEncodings(TextEncoding generalEncoding,
883  TextEncoding *bestMacEncoding,
884  TextEncoding *alternateMacEncoding);
885 
894  OSStatus
895  UCGetCharProperty(const UniChar *charPtr, UniCharCount textLength,
896  UCCharPropertyType propType, UCCharPropertyValue *propValue);
897 
898 #if PRAGMA_STRUCT_ALIGN
899 #pragma options align = reset
900 #elif PRAGMA_STRUCT_PACKPUSH
901 #pragma pack(pop)
902 #elif PRAGMA_STRUCT_PACK
903 #pragma pack()
904 #endif
905 
906 #ifdef PRAGMA_IMPORT_OFF
907 #pragma import off
908 #elif PRAGMA_IMPORT
909 #pragma import reset
910 #endif
911 
912 #ifdef __cplusplus
913 }
914 #endif
915 
916 #endif /* __TEXTCOMMON__ */
Basic Macintosh data types.
OSStatus TECGetInfo(TECInfoHandle *tecInfo)
TextEncoding ResolveDefaultTextEncoding(TextEncoding encoding)
OSStatus GetTextEncodingName(TextEncoding iEncoding, TextEncodingNameSelector iNamePartSelector, RegionCode iPreferredRegion, TextEncoding iPreferredEncoding, ByteCount iOutputBufLen, ByteCount *oNameLength, RegionCode *oActualRegion, TextEncoding *oActualEncoding, TextPtr oEncodingName)
OSStatus UCGetCharProperty(const UniChar *charPtr, UniCharCount textLength, UCCharPropertyType propType, UCCharPropertyValue *propValue)
TextEncodingFormat GetTextEncodingFormat(TextEncoding encoding)
UInt32 UniCharArrayOffset
Definition: TextCommon.h:545
TextEncoding CreateTextEncoding(TextEncodingBase encodingBase, TextEncodingVariant encodingVariant, TextEncodingFormat encodingFormat)
UniCharArrayPtr * UniCharArrayHandle
Definition: TextCommon.h:540
OSStatus NearestMacTextEncodings(TextEncoding generalEncoding, TextEncoding *bestMacEncoding, TextEncoding *alternateMacEncoding)
TextEncodingBase GetTextEncodingBase(TextEncoding encoding)
SInt32 UCCharPropertyType
Definition: TextCommon.h:672
TextEncodingVariant GetTextEncodingVariant(TextEncoding encoding)
OSStatus UpgradeScriptInfoToTextEncoding(ScriptCode iTextScriptID, LangCode iTextLanguageID, RegionCode iRegionID, ConstStr255Param iTextFontname, TextEncoding *oEncoding)
OSStatus RevertTextEncodingToScriptInfo(TextEncoding iEncoding, ScriptCode *oTextScriptID, LangCode *oTextLanguageID, Str255 oTextFontname)
Definition: TextCommon.h:524
Definition: TextCommon.h:557
Definition: TextCommon.h:516