00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "utf8.h"
00011 #include <cctype>
00012 #include "util.h"
00013
00014 unsigned int WGL4_Normalization_00[256] = {
00015 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
00016 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
00017 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
00018 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
00019 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
00020 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
00021 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
00022 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
00023 0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
00024 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
00025 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
00026 0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
00027 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
00028 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
00029 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
00030 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
00031 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
00032 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
00033 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
00034 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
00035 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
00036 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
00037 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
00038 0x0020, 0x0031, 0x006f, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
00039 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00c6, 0x0063,
00040 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
00041 0x00d0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00d7,
00042 0x00d8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00de, 0x00df,
00043 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
00044 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
00045 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
00046 0x00f8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079,
00047 };
00048
00049 unsigned int WGL4_Normalization_01[256] = {
00050 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0063,
00051 0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0064, 0x0064,
00052 0x0111, 0x0111, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065,
00053 0x0065, 0x0065, 0x0065, 0x0065, 0x0067, 0x0067, 0x0067, 0x0067,
00054 0x0067, 0x0067, 0x0067, 0x0067, 0x0068, 0x0068, 0x0127, 0x0127,
00055 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069,
00056 0x0069, 0x0131, 0x0069, 0x0069, 0x006a, 0x006a, 0x006b, 0x006b,
00057 0x0138, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c,
00058 0x006c, 0x0142, 0x0142, 0x006e, 0x006e, 0x006e, 0x006e, 0x006e,
00059 0x006e, 0x006e, 0x014a, 0x014b, 0x006f, 0x006f, 0x006f, 0x006f,
00060 0x006f, 0x006f, 0x0153, 0x0153, 0x0072, 0x0072, 0x0072, 0x0072,
00061 0x0072, 0x0072, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073,
00062 0x0073, 0x0073, 0x0074, 0x0074, 0x0074, 0x0074, 0x0167, 0x0167,
00063 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
00064 0x0075, 0x0075, 0x0075, 0x0075, 0x0077, 0x0077, 0x0079, 0x0079,
00065 0x0079, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x0073,
00066 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
00067 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
00068 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
00069 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
00070 0x01a0, 0x01a1, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
00071 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x01af,
00072 0x01b0, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
00073 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
00074 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x01c4, 0x01c5, 0x01c6, 0x01c7,
00075 0x01c8, 0x01c9, 0x01ca, 0x01cb, 0x01cc, 0x01cd, 0x01ce, 0x01cf,
00076 0x01d0, 0x01d1, 0x01d2, 0x01d3, 0x01d4, 0x01d5, 0x01d6, 0x01d7,
00077 0x01d8, 0x01d9, 0x01da, 0x01db, 0x01dc, 0x01dd, 0x01de, 0x01df,
00078 0x01e0, 0x01e1, 0x01e2, 0x01e3, 0x01e4, 0x01e5, 0x01e6, 0x01e7,
00079 0x01e8, 0x01e9, 0x01ea, 0x01eb, 0x01ec, 0x01ed, 0x01ee, 0x01ef,
00080 0x01f0, 0x01f1, 0x01f2, 0x01f3, 0x01f4, 0x01f5, 0x01f6, 0x01f7,
00081 0x01f8, 0x01f9, 0x00e5, 0x00e5, 0x00e6, 0x00e6, 0x00f8, 0x00f8,
00082 };
00083
00084 unsigned int WGL4_Normalization_02[256] = {
00085 0x0200, 0x0201, 0x0202, 0x0203, 0x0204, 0x0205, 0x0206, 0x0207,
00086 0x0208, 0x0209, 0x020a, 0x020b, 0x020c, 0x020d, 0x020e, 0x020f,
00087 0x0210, 0x0211, 0x0212, 0x0213, 0x0214, 0x0215, 0x0216, 0x0217,
00088 0x0218, 0x0219, 0x021a, 0x021b, 0x021c, 0x021d, 0x021e, 0x021f,
00089 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0226, 0x0227,
00090 0x0228, 0x0229, 0x022a, 0x022b, 0x022c, 0x022d, 0x022e, 0x022f,
00091 0x0230, 0x0231, 0x0232, 0x0233, 0x0234, 0x0235, 0x0236, 0x0237,
00092 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
00093 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
00094 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
00095 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
00096 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
00097 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
00098 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
00099 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
00100 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
00101 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
00102 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
00103 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
00104 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
00105 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
00106 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
00107 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x02b5, 0x02b6, 0x02b7,
00108 0x02b8, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
00109 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
00110 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
00111 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
00112 0x0020, 0x0020, 0x0020, 0x0020, 0x02dc, 0x0020, 0x02de, 0x02df,
00113 0x02e0, 0x02e1, 0x02e2, 0x02e3, 0x02e4, 0x02e5, 0x02e6, 0x02e7,
00114 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
00115 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
00116 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
00117 };
00118
00119 unsigned int WGL4_Normalization_03[256] = {
00120 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
00121 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
00122 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
00123 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
00124 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
00125 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
00126 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
00127 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
00128 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347,
00129 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
00130 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
00131 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
00132 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
00133 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
00134 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377,
00135 0x0378, 0x0379, 0x037a, 0x037b, 0x037c, 0x037d, 0x037e, 0x037f,
00136 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x03b1, 0x00b7,
00137 0x03b5, 0x03b7, 0x03b9, 0x038b, 0x03bf, 0x038d, 0x03c5, 0x03c9,
00138 0x03ca, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
00139 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
00140 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
00141 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
00142 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
00143 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
00144 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
00145 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x03cf,
00146 0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7,
00147 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
00148 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
00149 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
00150 0x03f0, 0x03f1, 0x03f2, 0x03f3, 0x03f4, 0x03f5, 0x03f6, 0x03f7,
00151 0x03f8, 0x03f9, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
00152 };
00153
00154 unsigned int WGL4_Normalization_04[256] = {
00155 0x0400, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
00156 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x040d, 0x0443, 0x045f,
00157 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
00158 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
00159 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
00160 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
00161 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
00162 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
00163 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
00164 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
00165 0x0450, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
00166 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x045d, 0x0443, 0x045f,
00167 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
00168 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
00169 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0476, 0x0477,
00170 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
00171 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
00172 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
00173 0x0491, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
00174 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
00175 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
00176 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
00177 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
00178 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
00179 0x04c0, 0x04c1, 0x04c2, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
00180 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
00181 0x04d0, 0x04d1, 0x04d2, 0x04d3, 0x04d4, 0x04d5, 0x04d6, 0x04d7,
00182 0x04d8, 0x04d9, 0x04da, 0x04db, 0x04dc, 0x04dd, 0x04de, 0x04df,
00183 0x04e0, 0x04e1, 0x04e2, 0x04e3, 0x04e4, 0x04e5, 0x04e6, 0x04e7,
00184 0x04e8, 0x04e9, 0x04ea, 0x04eb, 0x04ec, 0x04ed, 0x04ee, 0x04ef,
00185 0x04f0, 0x04f1, 0x04f2, 0x04f3, 0x04f4, 0x04f5, 0x04f6, 0x04f7,
00186 0x04f8, 0x04f9, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
00187 };
00188
00189 unsigned int WGL4_Normalization_1e[256] = {
00190 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 0x1e07,
00191 0x1e08, 0x1e09, 0x1e0a, 0x1e0b, 0x1e0c, 0x1e0d, 0x1e0e, 0x1e0f,
00192 0x1e10, 0x1e11, 0x1e12, 0x1e13, 0x1e14, 0x1e15, 0x1e16, 0x1e17,
00193 0x1e18, 0x1e19, 0x1e1a, 0x1e1b, 0x1e1c, 0x1e1d, 0x1e1e, 0x1e1f,
00194 0x1e20, 0x1e21, 0x1e22, 0x1e23, 0x1e24, 0x1e25, 0x1e26, 0x1e27,
00195 0x1e28, 0x1e29, 0x1e2a, 0x1e2b, 0x1e2c, 0x1e2d, 0x1e2e, 0x1e2f,
00196 0x1e30, 0x1e31, 0x1e32, 0x1e33, 0x1e34, 0x1e35, 0x1e36, 0x1e37,
00197 0x1e38, 0x1e39, 0x1e3a, 0x1e3b, 0x1e3c, 0x1e3d, 0x1e3e, 0x1e3f,
00198 0x1e40, 0x1e41, 0x1e42, 0x1e43, 0x1e44, 0x1e45, 0x1e46, 0x1e47,
00199 0x1e48, 0x1e49, 0x1e4a, 0x1e4b, 0x1e4c, 0x1e4d, 0x1e4e, 0x1e4f,
00200 0x1e50, 0x1e51, 0x1e52, 0x1e53, 0x1e54, 0x1e55, 0x1e56, 0x1e57,
00201 0x1e58, 0x1e59, 0x1e5a, 0x1e5b, 0x1e5c, 0x1e5d, 0x1e5e, 0x1e5f,
00202 0x1e60, 0x1e61, 0x1e62, 0x1e63, 0x1e64, 0x1e65, 0x1e66, 0x1e67,
00203 0x1e68, 0x1e69, 0x1e6a, 0x1e6b, 0x1e6c, 0x1e6d, 0x1e6e, 0x1e6f,
00204 0x1e70, 0x1e71, 0x1e72, 0x1e73, 0x1e74, 0x1e75, 0x1e76, 0x1e77,
00205 0x1e78, 0x1e79, 0x1e7a, 0x1e7b, 0x1e7c, 0x1e7d, 0x1e7e, 0x1e7f,
00206 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x1e86, 0x1e87,
00207 0x1e88, 0x1e89, 0x1e8a, 0x1e8b, 0x1e8c, 0x1e8d, 0x1e8e, 0x1e8f,
00208 0x1e90, 0x1e91, 0x1e92, 0x1e93, 0x1e94, 0x1e95, 0x1e96, 0x1e97,
00209 0x1e98, 0x1e99, 0x1e9a, 0x1e9b, 0x1e9c, 0x1e9d, 0x1e9e, 0x1e9f,
00210 0x1ea0, 0x1ea1, 0x1ea2, 0x1ea3, 0x1ea4, 0x1ea5, 0x1ea6, 0x1ea7,
00211 0x1ea8, 0x1ea9, 0x1eaa, 0x1eab, 0x1eac, 0x1ead, 0x1eae, 0x1eaf,
00212 0x1eb0, 0x1eb1, 0x1eb2, 0x1eb3, 0x1eb4, 0x1eb5, 0x1eb6, 0x1eb7,
00213 0x1eb8, 0x1eb9, 0x1eba, 0x1ebb, 0x1ebc, 0x1ebd, 0x1ebe, 0x1ebf,
00214 0x1ec0, 0x1ec1, 0x1ec2, 0x1ec3, 0x1ec4, 0x1ec5, 0x1ec6, 0x1ec7,
00215 0x1ec8, 0x1ec9, 0x1eca, 0x1ecb, 0x1ecc, 0x1ecd, 0x1ece, 0x1ecf,
00216 0x1ed0, 0x1ed1, 0x1ed2, 0x1ed3, 0x1ed4, 0x1ed5, 0x1ed6, 0x1ed7,
00217 0x1ed8, 0x1ed9, 0x1eda, 0x1edb, 0x1edc, 0x1edd, 0x1ede, 0x1edf,
00218 0x1ee0, 0x1ee1, 0x1ee2, 0x1ee3, 0x1ee4, 0x1ee5, 0x1ee6, 0x1ee7,
00219 0x1ee8, 0x1ee9, 0x1eea, 0x1eeb, 0x1eec, 0x1eed, 0x1eee, 0x1eef,
00220 0x1ef0, 0x1ef1, 0x0079, 0x0079, 0x1ef4, 0x1ef5, 0x1ef6, 0x1ef7,
00221 0x1ef8, 0x1ef9, 0x1efa, 0x1efb, 0x1efc, 0x1efd, 0x1efe, 0x1eff,
00222 };
00223
00224 unsigned int WGL4_Normalization_21[256] = {
00225 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107,
00226 0x2108, 0x2109, 0x210a, 0x210b, 0x210c, 0x210d, 0x210e, 0x210f,
00227 0x2110, 0x2111, 0x2112, 0x006c, 0x2114, 0x2115, 0x2116, 0x2117,
00228 0x2118, 0x2119, 0x211a, 0x211b, 0x211c, 0x211d, 0x211e, 0x211f,
00229 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x03c9, 0x2127,
00230 0x2128, 0x2129, 0x212a, 0x212b, 0x212c, 0x212d, 0x212e, 0x212f,
00231 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137,
00232 0x2138, 0x2139, 0x213a, 0x213b, 0x213c, 0x213d, 0x213e, 0x213f,
00233 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147,
00234 0x2148, 0x2149, 0x214a, 0x214b, 0x214c, 0x214d, 0x214e, 0x214f,
00235 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157,
00236 0x2158, 0x2159, 0x215a, 0x215b, 0x215c, 0x215d, 0x215e, 0x215f,
00237 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167,
00238 0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f,
00239 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177,
00240 0x2178, 0x2179, 0x217a, 0x217b, 0x217c, 0x217d, 0x217e, 0x217f,
00241 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187,
00242 0x2188, 0x2189, 0x218a, 0x218b, 0x218c, 0x218d, 0x218e, 0x218f,
00243 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197,
00244 0x2198, 0x2199, 0x219a, 0x219b, 0x219c, 0x219d, 0x219e, 0x219f,
00245 0x21a0, 0x21a1, 0x21a2, 0x21a3, 0x21a4, 0x21a5, 0x21a6, 0x21a7,
00246 0x21a8, 0x21a9, 0x21aa, 0x21ab, 0x21ac, 0x21ad, 0x21ae, 0x21af,
00247 0x21b0, 0x21b1, 0x21b2, 0x21b3, 0x21b4, 0x21b5, 0x21b6, 0x21b7,
00248 0x21b8, 0x21b9, 0x21ba, 0x21bb, 0x21bc, 0x21bd, 0x21be, 0x21bf,
00249 0x21c0, 0x21c1, 0x21c2, 0x21c3, 0x21c4, 0x21c5, 0x21c6, 0x21c7,
00250 0x21c8, 0x21c9, 0x21ca, 0x21cb, 0x21cc, 0x21cd, 0x21ce, 0x21cf,
00251 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x21d5, 0x21d6, 0x21d7,
00252 0x21d8, 0x21d9, 0x21da, 0x21db, 0x21dc, 0x21dd, 0x21de, 0x21df,
00253 0x21e0, 0x21e1, 0x21e2, 0x21e3, 0x21e4, 0x21e5, 0x21e6, 0x21e7,
00254 0x21e8, 0x21e9, 0x21ea, 0x21eb, 0x21ec, 0x21ed, 0x21ee, 0x21ef,
00255 0x21f0, 0x21f1, 0x21f2, 0x21f3, 0x21f4, 0x21f5, 0x21f6, 0x21f7,
00256 0x21f8, 0x21f9, 0x21fa, 0x21fb, 0x21fc, 0x21fd, 0x21fe, 0x21ff,
00257 };
00258
00259 unsigned int* WGL4NormalizationTables[256] = {
00260 WGL4_Normalization_00,
00261 WGL4_Normalization_01,
00262 WGL4_Normalization_02,
00263 WGL4_Normalization_03,
00264 WGL4_Normalization_04,
00265 NULL, NULL, NULL,
00266 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00267 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00268 NULL, NULL, NULL, NULL, NULL, NULL, WGL4_Normalization_1e, NULL,
00269 NULL, WGL4_Normalization_21, NULL, NULL, NULL, NULL, NULL, NULL,
00270 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00271 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00272 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00273 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00274 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00275 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00276 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00277 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00278 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00279 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00280 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00281 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00282 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00283 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00284 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00285 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00286 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00287 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00288 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00289 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00290 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00291 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00292 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00293 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00294 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00295 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00296 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
00297 };
00298
00299
00303 bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
00304 {
00305 unsigned int c0 = (unsigned int) str[pos];
00306 int charlen = UTF8EncodedSizeFromFirstByte(c0);
00307
00308
00309 if (pos + charlen > (int)str.length())
00310 return false;
00311
00312
00313
00314
00315
00316 switch (charlen)
00317 {
00318 case 1:
00319 ch = c0;
00320 return true;
00321
00322 case 2:
00323 ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
00324 return true;
00325
00326 case 3:
00327 ch = ((c0 & 0x0f) << 12) |
00328 (((unsigned int) str[pos + 1] & 0x3f) << 6) |
00329 ((unsigned int) str[pos + 2] & 0x3f);
00330 return true;
00331
00332 case 4:
00333 ch = ((c0 & 0x07) << 18) |
00334 (((unsigned int) str[pos + 1] & 0x3f) << 12) |
00335 (((unsigned int) str[pos + 2] & 0x3f) << 6) |
00336 ((unsigned int) str[pos + 3] & 0x3f);
00337 return true;
00338
00339 case 5:
00340 ch = ((c0 & 0x03) << 24) |
00341 (((unsigned int) str[pos + 1] & 0x3f) << 18) |
00342 (((unsigned int) str[pos + 2] & 0x3f) << 12) |
00343 (((unsigned int) str[pos + 3] & 0x3f) << 6) |
00344 ((unsigned int) str[pos + 4] & 0x3f);
00345 return true;
00346
00347 case 6:
00348 ch = ((c0 & 0x01) << 30) |
00349 (((unsigned int) str[pos + 1] & 0x3f) << 24) |
00350 (((unsigned int) str[pos + 2] & 0x3f) << 18) |
00351 (((unsigned int) str[pos + 3] & 0x3f) << 12) |
00352 (((unsigned int) str[pos + 4] & 0x3f) << 6) |
00353 ((unsigned int) str[pos + 5] & 0x3f);
00354 return true;
00355
00356 default:
00357 return false;
00358 }
00359 }
00360
00361
00365 bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch)
00366 {
00367 unsigned int c0 = (unsigned int) str[pos];
00368 int charlen = UTF8EncodedSizeFromFirstByte(c0);
00369
00370
00371 if (pos + charlen > length)
00372 return false;
00373
00374
00375
00376
00377
00378 switch (charlen)
00379 {
00380 case 1:
00381 ch = c0;
00382 return true;
00383
00384 case 2:
00385 ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
00386 return true;
00387
00388 case 3:
00389 ch = ((c0 & 0x0f) << 12) |
00390 (((unsigned int) str[pos + 1] & 0x3f) << 6) |
00391 ((unsigned int) str[pos + 2] & 0x3f);
00392 return true;
00393
00394 case 4:
00395 ch = ((c0 & 0x07) << 18) |
00396 (((unsigned int) str[pos + 1] & 0x3f) << 12) |
00397 (((unsigned int) str[pos + 2] & 0x3f) << 6) |
00398 ((unsigned int) str[pos + 3] & 0x3f);
00399 return true;
00400
00401 case 5:
00402 ch = ((c0 & 0x03) << 24) |
00403 (((unsigned int) str[pos + 1] & 0x3f) << 18) |
00404 (((unsigned int) str[pos + 2] & 0x3f) << 12) |
00405 (((unsigned int) str[pos + 3] & 0x3f) << 6) |
00406 ((unsigned int) str[pos + 4] & 0x3f);
00407 return true;
00408
00409 case 6:
00410 ch = ((c0 & 0x01) << 30) |
00411 (((unsigned int) str[pos + 1] & 0x3f) << 24) |
00412 (((unsigned int) str[pos + 2] & 0x3f) << 18) |
00413 (((unsigned int) str[pos + 3] & 0x3f) << 12) |
00414 (((unsigned int) str[pos + 4] & 0x3f) << 6) |
00415 ((unsigned int) str[pos + 5] & 0x3f);
00416 return true;
00417
00418 default:
00419 return false;
00420 }
00421 }
00422
00423
00427 int UTF8Encode(wchar_t ch, char* s)
00428 {
00429 if (ch < 0x80)
00430 {
00431 s[0] = (char) ch;
00432 s[1] = '\0';
00433 return 1;
00434 }
00435 else if (ch < 0x800)
00436 {
00437 s[0] = (char) (0xc0 | ((ch & 0x7c0) >> 6));
00438 s[1] = (char) (0x80 | (ch & 0x3f));
00439 s[2] = '\0';
00440 return 2;
00441 }
00442 else if (ch < 0x10000)
00443 {
00444 s[0] = (char) (0xe0 | ((ch & 0xf000) >> 12));
00445 s[1] = (char) (0x80 | ((ch & 0x0fc0) >> 6));
00446 s[2] = (char) (0x80 | ((ch & 0x003f)));
00447 s[3] = '\0';
00448 return 3;
00449 }
00450 else if (ch < 0x200000)
00451 {
00452 s[0] = (char) (0xf0 | ((ch & 0x1c0000) >> 18));
00453 s[1] = (char) (0x80 | ((ch & 0x03f000) >> 12));
00454 s[2] = (char) (0x80 | ((ch & 0x000fc0) >> 6));
00455 s[3] = (char) (0x80 | ((ch & 0x00003f)));
00456 s[4] = '\0';
00457 return 4;
00458 }
00459 else if (ch < 0x4000000)
00460 {
00461 s[0] = (char) (0xf8 | ((ch & 0x3000000) >> 24));
00462 s[1] = (char) (0x80 | ((ch & 0x0fc0000) >> 18));
00463 s[2] = (char) (0x80 | ((ch & 0x003f000) >> 12));
00464 s[3] = (char) (0x80 | ((ch & 0x0000fc0) >> 6));
00465 s[4] = (char) (0x80 | ((ch & 0x000003f)));
00466 s[5] = '\0';
00467 return 5;
00468 }
00469 else
00470 {
00471 s[0] = (char) (0xfc | ((ch & 0x40000000) >> 30));
00472 s[1] = (char) (0x80 | ((ch & 0x3f000000) >> 24));
00473 s[2] = (char) (0x80 | ((ch & 0x00fc0000) >> 18));
00474 s[3] = (char) (0x80 | ((ch & 0x0003f000) >> 12));
00475 s[4] = (char) (0x80 | ((ch & 0x00000fc0) >> 6));
00476 s[5] = (char) (0x80 | ((ch & 0x0000003f)));
00477 s[6] = '\0';
00478 return 6;
00479 }
00480 }
00481
00482
00484 int UTF8Length(const std::string& s)
00485 {
00486 int len = s.length();
00487 int count = 0;
00488 for (int i = 0; i < len; i++)
00489 {
00490 unsigned int c = (unsigned int) ((unsigned char) s[i]);
00491 if ((c < 0x80) || (c >= 0xc2 && c <= 0xf4))
00492 count++;
00493 }
00494
00495 return count;
00496 }
00497
00498
00499 inline wchar_t UTF8Normalize(wchar_t ch)
00500 {
00501 unsigned int page = (unsigned int) ch >> 8;
00502 if (page >= 256)
00503 return ch;
00504
00505 unsigned int* normTable = WGL4NormalizationTables[page];
00506 if (normTable == NULL)
00507 return ch;
00508
00509 return (wchar_t) normTable[(unsigned int) ch & 0xff];
00510 }
00511
00512
00516 int UTF8StringCompare(const std::string& s0, const std::string& s1)
00517 {
00518 int len0 = s0.length();
00519 int len1 = s1.length();
00520 int i0 = 0;
00521 int i1 = 0;
00522 while (i0 < len0 && i1 < len1)
00523 {
00524 wchar_t ch0 = 0;
00525 wchar_t ch1 = 0;
00526 if (!UTF8Decode(s0, i0, ch0))
00527 return 1;
00528 if (!UTF8Decode(s1, i1, ch1))
00529 return -1;
00530
00531 i0 += UTF8EncodedSize(ch0);
00532 i1 += UTF8EncodedSize(ch1);
00533 ch0 = UTF8Normalize(ch0);
00534 ch1 = UTF8Normalize(ch1);
00535
00536 if (ch0 < ch1)
00537 return -1;
00538 else if (ch0 > ch1)
00539 return 1;
00540 }
00541
00542 if (i0 == len0 && i1 == len1)
00543 return 0;
00544
00545 len0 = UTF8Length(s0);
00546 len1 = UTF8Length(s1);
00547 if (len0 > len1)
00548 return 1;
00549 else if (len0 < len1)
00550 return -1;
00551 else
00552 return 0;
00553 }
00554
00555 int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n)
00556 {
00557 int len0 = s0.length();
00558 int len1 = s1.length();
00559 int i0 = 0;
00560 int i1 = 0;
00561 while (i0 < len0 && i1 < len1 && n > 0)
00562 {
00563 wchar_t ch0 = 0;
00564 wchar_t ch1 = 0;
00565 if (!UTF8Decode(s0, i0, ch0))
00566 return 1;
00567 if (!UTF8Decode(s1, i1, ch1))
00568 return -1;
00569
00570 i0 += UTF8EncodedSize(ch0);
00571 i1 += UTF8EncodedSize(ch1);
00572 ch0 = UTF8Normalize(ch0);
00573 ch1 = UTF8Normalize(ch1);
00574
00575 if (ch0 < ch1)
00576 return -1;
00577 else if (ch0 > ch1)
00578 return 1;
00579
00580 n--;
00581 }
00582
00583 if (n == 0)
00584 return 0;
00585
00586 len0 = UTF8Length(s0);
00587 len1 = UTF8Length(s1);
00588 if (len0 > len1)
00589 return 1;
00590 else if (len0 < len1)
00591 return -1;
00592 else
00593 return 0;
00594 }
00595
00596
00599 class UTF8StringIterator
00600 {
00601 public:
00602 UTF8StringIterator(const std::string& _str);
00603 UTF8StringIterator(const UTF8StringIterator& iter);
00604
00605 UTF8StringIterator& operator++();
00606 UTF8StringIterator& operator++(int);
00607
00608 private:
00609 const std::string& str;
00610 int position;
00611 };
00612
00613
00614 UTF8StringIterator::UTF8StringIterator(const std::string& _str) :
00615 str(_str),
00616 position(0)
00617 {
00618 }
00619
00620
00621 UTF8StringIterator::UTF8StringIterator(const UTF8StringIterator& iter) :
00622 str(iter.str),
00623 position(iter.position)
00624 {
00625 }
00626
00627
00628 UTF8StringIterator& UTF8StringIterator::operator++()
00629 {
00630 return *this;
00631 }
00632
00633
00634 UTF8StringIterator& UTF8StringIterator::operator++(int)
00635 {
00636 return *this;
00637 }
00638
00639
00640 static const char *greekAlphabet[] =
00641 {
00642 "Alpha",
00643 "Beta",
00644 "Gamma",
00645 "Delta",
00646 "Epsilon",
00647 "Zeta",
00648 "Eta",
00649 "Theta",
00650 "Iota",
00651 "Kappa",
00652 "Lambda",
00653 "Mu",
00654 "Nu",
00655 "Xi",
00656 "Omicron",
00657 "Pi",
00658 "Rho",
00659 "Sigma",
00660 "Tau",
00661 "Upsilon",
00662 "Phi",
00663 "Chi",
00664 "Psi",
00665 "Omega"
00666 };
00667
00668 static const char* greekAlphabetUTF8[] =
00669 {
00670 "\316\261",
00671 "\316\262",
00672 "\316\263",
00673 "\316\264",
00674 "\316\265",
00675 "\316\266",
00676 "\316\267",
00677 "\316\270",
00678 "\316\271",
00679 "\316\272",
00680 "\316\273",
00681 "\316\274",
00682 "\316\275",
00683 "\316\276",
00684 "\316\277",
00685 "\317\200",
00686 "\317\201",
00687 "\317\203",
00688 "\317\204",
00689 "\317\205",
00690 "\317\206",
00691 "\317\207",
00692 "\317\210",
00693 "\317\211",
00694 };
00695
00696 static const char* canonicalAbbrevs[] =
00697 {
00698 "ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET",
00699 "IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" ,
00700 "RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME",
00701 };
00702
00703 static std::string noAbbrev("");
00704
00705
00706
00707 Greek* Greek::instance = NULL;
00708
00709 Greek::Greek()
00710 {
00711 nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]);
00712 names = new std::string[nLetters];
00713 abbrevs = new std::string[nLetters];
00714
00715 for (int i = 0; i < nLetters; i++)
00716 {
00717 names[i] = std::string(greekAlphabet[i]);
00718 abbrevs[i] = std::string(canonicalAbbrevs[i]);
00719 }
00720 }
00721
00722 Greek::~Greek()
00723 {
00724 delete[] names;
00725 delete[] abbrevs;
00726 }
00727
00728 const std::string& Greek::canonicalAbbreviation(const std::string& letter)
00729 {
00730 if (instance == NULL)
00731 instance = new Greek();
00732
00733 int i;
00734 for (i = 0; i < Greek::instance->nLetters; i++)
00735 {
00736 if (compareIgnoringCase(letter, instance->names[i]) == 0)
00737 return instance->abbrevs[i];
00738 }
00739
00740 for (i = 0; i < Greek::instance->nLetters; i++)
00741 {
00742 if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0)
00743 return instance->abbrevs[i];
00744 }
00745
00746 if (letter.length() == 2)
00747 {
00748 for (i = 0; i < Greek::instance->nLetters; i++)
00749 {
00750 if (letter[0] == greekAlphabetUTF8[i][0] &&
00751 letter[1] == greekAlphabetUTF8[i][1])
00752 {
00753 return instance->abbrevs[i];
00754 }
00755 }
00756 }
00757
00758 return noAbbrev;
00759 }
00760
00765 std::string ReplaceGreekLetterAbbr(std::string str)
00766 {
00767 std::string ret = str;
00768
00769 if (str[0] >= 'A' && str[0] <= 'Z' &&
00770 str[1] >= 'A' && str[1] <= 'Z')
00771 {
00772
00773 for (int i = 0; i < Greek::instance->nLetters; i++)
00774 {
00775 const std::string& abbr = Greek::instance->abbrevs[i];
00776 if (str.compare(0, abbr.length(), abbr) == 0)
00777 {
00778 std::string superscript;
00779 if (str.length() > abbr.length())
00780 {
00781 if (str[abbr.length()] == '1')
00782 superscript = UTF8_SUPERSCRIPT_1;
00783 else if (str[abbr.length()] == '2')
00784 superscript = UTF8_SUPERSCRIPT_2;
00785 else if (str[abbr.length()] == '3')
00786 superscript = UTF8_SUPERSCRIPT_3;
00787 }
00788
00789 if (superscript.empty())
00790 {
00791 ret = std::string(greekAlphabetUTF8[i]) + str.substr(abbr.length());
00792 }
00793 else
00794 {
00795 ret = std::string(greekAlphabetUTF8[i]) + superscript +
00796 str.substr(abbr.length() + 1);
00797 }
00798
00799 break;
00800 }
00801 }
00802 }
00803
00804 return ret;
00805 }