00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef _CELUTIL_UTF8_
00011 #define _CELUTIL_UTF8_
00012
00013 #include <string>
00014
00015 #define UTF8_DEGREE_SIGN "\302\260"
00016 #define UTF8_MULTIPLICATION_SIGN "\303\227"
00017 #define UTF8_SUPERSCRIPT_1 "\302\271"
00018 #define UTF8_SUPERSCRIPT_2 "\302\262"
00019 #define UTF8_SUPERSCRIPT_3 "\302\263"
00020
00021
00022 bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
00023 bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
00024 int UTF8Encode(wchar_t ch, char* s);
00025 int UTF8StringCompare(const std::string& s0, const std::string& s1);
00026 int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t length);
00027 int UTF8Length(const std::string& s);
00028
00029 inline int UTF8EncodedSize(wchar_t ch)
00030 {
00031 if (ch < 0x80)
00032 return 1;
00033 else if (ch < 0x800)
00034 return 2;
00035 else if (ch < 0x10000)
00036 return 3;
00037 else if (ch < 0x200000)
00038 return 4;
00039 else if (ch < 0x4000000)
00040 return 5;
00041 else
00042 return 6;
00043 }
00044
00045 inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
00046 {
00047 int charlen = 1;
00048
00049 if (ch < 0x80)
00050 charlen = 1;
00051 else if ((ch & 0xe0) == 0xc0)
00052 charlen = 2;
00053 else if ((ch & 0xf0) == 0xe0)
00054 charlen = 3;
00055 else if ((ch & 0xf8) == 0xf0)
00056 charlen = 4;
00057 else if ((ch & 0xfc) == 0xf8)
00058 charlen = 5;
00059 else if ((ch & 0xfe) == 0xfc)
00060 charlen = 6;
00061
00062 return charlen;
00063 }
00064
00065 std::string ReplaceGreekLetterAbbr(std::string);
00066
00067 class Greek
00068 {
00069 private:
00070 Greek();
00071 ~Greek();
00072
00073 public:
00074 enum Letter {
00075 Alpha = 1,
00076 Beta = 2,
00077 Gamma = 3,
00078 Delta = 4,
00079 Epsilon = 5,
00080 Zeta = 6,
00081 Eta = 7,
00082 Theta = 8,
00083 Iota = 9,
00084 Kappa = 10,
00085 Lambda = 11,
00086 Mu = 12,
00087 Nu = 13,
00088 Xi = 14,
00089 Omicron = 15,
00090 Pi = 16,
00091 Rho = 17,
00092 Sigma = 18,
00093 Tau = 19,
00094 Upsilon = 20,
00095 Phi = 21,
00096 Chi = 22,
00097 Psi = 23,
00098 Omega = 24,
00099 };
00100
00101 static const std::string& canonicalAbbreviation(const std::string&);
00102
00103 public:
00104 static Greek* instance;
00105 int nLetters;
00106 std::string* names;
00107 std::string* abbrevs;
00108 };
00109
00110 #endif // _CELUTIL_UTF8_