| | 120 | template<size_t size> struct NonASCIIMask; |
|---|
| | 121 | template<> struct NonASCIIMask<4> { |
|---|
| | 122 | static unsigned value() { return 0x80808080U; } |
|---|
| | 123 | }; |
|---|
| | 124 | template<> struct NonASCIIMask<8> { |
|---|
| | 125 | static unsigned long long value() { return 0x8080808080808080ULL; } |
|---|
| | 126 | }; |
|---|
| | 127 | |
|---|
| | 128 | template<size_t size> struct UCharByteFiller; |
|---|
| | 129 | template<> struct UCharByteFiller<4> { |
|---|
| | 130 | static void copy(UChar* dest, const unsigned char* src) |
|---|
| | 131 | { |
|---|
| | 132 | dest[0] = src[0]; |
|---|
| | 133 | dest[1] = src[1]; |
|---|
| | 134 | dest[2] = src[2]; |
|---|
| | 135 | dest[3] = src[3]; |
|---|
| | 136 | } |
|---|
| | 137 | }; |
|---|
| | 138 | template<> struct UCharByteFiller<8> { |
|---|
| | 139 | static void copy(UChar* dest, const unsigned char* src) |
|---|
| | 140 | { |
|---|
| | 141 | dest[0] = src[0]; |
|---|
| | 142 | dest[1] = src[1]; |
|---|
| | 143 | dest[2] = src[2]; |
|---|
| | 144 | dest[3] = src[3]; |
|---|
| | 145 | dest[4] = src[4]; |
|---|
| | 146 | dest[5] = src[5]; |
|---|
| | 147 | dest[6] = src[6]; |
|---|
| | 148 | dest[7] = src[7]; |
|---|
| | 149 | } |
|---|
| | 150 | }; |
|---|
| | 151 | |
|---|
| 125 | | // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII. |
|---|
| 126 | | unsigned char ored = 0; |
|---|
| 127 | | for (size_t i = 0; i < length; ++i) { |
|---|
| 128 | | unsigned char c = bytes[i]; |
|---|
| 129 | | characters[i] = c; |
|---|
| 130 | | ored |= c; |
|---|
| 131 | | } |
|---|
| 132 | | |
|---|
| 133 | | if (!(ored & 0x80)) |
|---|
| 134 | | return result; |
|---|
| 135 | | |
|---|
| 136 | | // Convert the slightly slower way when there are non-ASCII characters. |
|---|
| 137 | | for (size_t i = 0; i < length; ++i) { |
|---|
| 138 | | unsigned char c = bytes[i]; |
|---|
| 139 | | characters[i] = table[c]; |
|---|
| | 157 | const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes); |
|---|
| | 158 | const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length); |
|---|
| | 159 | const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1)); |
|---|
| | 160 | UChar* dest = characters; |
|---|
| | 161 | |
|---|
| | 162 | while (src < end) { |
|---|
| | 163 | if (*src < 0x80) { |
|---|
| | 164 | // Fast path for values < 0x80 (most Latin-1 text will be ASCII) |
|---|
| | 165 | // Wait until we're at a properly aligned address, then read full CPU words. |
|---|
| | 166 | if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) { |
|---|
| | 167 | while (src < alignedEnd) { |
|---|
| | 168 | uintptr_t chunk = *reinterpret_cast<const uintptr_t*>(src); |
|---|
| | 169 | |
|---|
| | 170 | if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value()) |
|---|
| | 171 | goto useLookupTable; |
|---|
| | 172 | |
|---|
| | 173 | UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src); |
|---|
| | 174 | |
|---|
| | 175 | src += sizeof(uintptr_t); |
|---|
| | 176 | dest += sizeof(uintptr_t); |
|---|
| | 177 | } |
|---|
| | 178 | |
|---|
| | 179 | if (src == end) |
|---|
| | 180 | break; |
|---|
| | 181 | } |
|---|
| | 182 | *dest = *src; |
|---|
| | 183 | } else { |
|---|
| | 184 | useLookupTable: |
|---|
| | 185 | *dest = table[*src]; |
|---|
| | 186 | } |
|---|
| | 187 | |
|---|
| | 188 | ++src; |
|---|
| | 189 | ++dest; |
|---|