1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
// Copyright 2015 Google Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. //! CommonMark punctuation set based on spec and Unicode properties. // Autogenerated by mk_puncttable.py const PUNCT_MASKS_ASCII: [u16; 8] = [ 0x0000, // U+0000...U+000F 0x0000, // U+0010...U+001F 0xfffe, // U+0020...U+002F 0xfc00, // U+0030...U+003F 0x0001, // U+0040...U+004F 0xf800, // U+0050...U+005F 0x0001, // U+0060...U+006F 0x7800, // U+0070...U+007F ]; const PUNCT_TAB: [u16; 132] = [ 10, // U+00A0...U+00AF 11, // U+00B0...U+00BF 55, // U+0370...U+037F 56, // U+0380...U+038F 85, // U+0550...U+055F 88, // U+0580...U+058F 91, // U+05B0...U+05BF 92, // U+05C0...U+05CF 95, // U+05F0...U+05FF 96, // U+0600...U+060F 97, // U+0610...U+061F 102, // U+0660...U+066F 109, // U+06D0...U+06DF 112, // U+0700...U+070F 127, // U+07F0...U+07FF 131, // U+0830...U+083F 133, // U+0850...U+085F 150, // U+0960...U+096F 151, // U+0970...U+097F 175, // U+0AF0...U+0AFF 223, // U+0DF0...U+0DFF 228, // U+0E40...U+0E4F 229, // U+0E50...U+0E5F 240, // U+0F00...U+0F0F 241, // U+0F10...U+0F1F 243, // U+0F30...U+0F3F 248, // U+0F80...U+0F8F 253, // U+0FD0...U+0FDF 260, // U+1040...U+104F 271, // U+10F0...U+10FF 310, // U+1360...U+136F 320, // U+1400...U+140F 358, // U+1660...U+166F 361, // U+1690...U+169F 366, // U+16E0...U+16EF 371, // U+1730...U+173F 381, // U+17D0...U+17DF 384, // U+1800...U+180F 404, // U+1940...U+194F 417, // U+1A10...U+1A1F 426, // U+1AA0...U+1AAF 437, // U+1B50...U+1B5F 438, // U+1B60...U+1B6F 447, // U+1BF0...U+1BFF 451, // U+1C30...U+1C3F 455, // U+1C70...U+1C7F 460, // U+1CC0...U+1CCF 461, // U+1CD0...U+1CDF 513, // U+2010...U+201F 514, // U+2020...U+202F 515, // U+2030...U+203F 516, // U+2040...U+204F 517, // U+2050...U+205F 519, // U+2070...U+207F 520, // U+2080...U+208F 560, // U+2300...U+230F 562, // U+2320...U+232F 630, // U+2760...U+276F 631, // U+2770...U+277F 636, // U+27C0...U+27CF 638, // U+27E0...U+27EF 664, // U+2980...U+298F 665, // U+2990...U+299F 669, // U+29D0...U+29DF 671, // U+29F0...U+29FF 719, // U+2CF0...U+2CFF 727, // U+2D70...U+2D7F 736, // U+2E00...U+2E0F 737, // U+2E10...U+2E1F 738, // U+2E20...U+2E2F 739, // U+2E30...U+2E3F 740, // U+2E40...U+2E4F 768, // U+3000...U+300F 769, // U+3010...U+301F 771, // U+3030...U+303F 778, // U+30A0...U+30AF 783, // U+30F0...U+30FF 2639, // U+A4F0...U+A4FF 2656, // U+A600...U+A60F 2663, // U+A670...U+A67F 2671, // U+A6F0...U+A6FF 2695, // U+A870...U+A87F 2700, // U+A8C0...U+A8CF 2703, // U+A8F0...U+A8FF 2706, // U+A920...U+A92F 2709, // U+A950...U+A95F 2716, // U+A9C0...U+A9CF 2717, // U+A9D0...U+A9DF 2725, // U+AA50...U+AA5F 2733, // U+AAD0...U+AADF 2735, // U+AAF0...U+AAFF 2750, // U+ABE0...U+ABEF 4051, // U+FD30...U+FD3F 4065, // U+FE10...U+FE1F 4067, // U+FE30...U+FE3F 4068, // U+FE40...U+FE4F 4069, // U+FE50...U+FE5F 4070, // U+FE60...U+FE6F 4080, // U+FF00...U+FF0F 4081, // U+FF10...U+FF1F 4082, // U+FF20...U+FF2F 4083, // U+FF30...U+FF3F 4085, // U+FF50...U+FF5F 4086, // U+FF60...U+FF6F 4112, // U+10100...U+1010F 4153, // U+10390...U+1039F 4157, // U+103D0...U+103DF 4182, // U+10560...U+1056F 4229, // U+10850...U+1085F 4241, // U+10910...U+1091F 4243, // U+10930...U+1093F 4261, // U+10A50...U+10A5F 4263, // U+10A70...U+10A7F 4271, // U+10AF0...U+10AFF 4275, // U+10B30...U+10B3F 4281, // U+10B90...U+10B9F 4356, // U+11040...U+1104F 4363, // U+110B0...U+110BF 4364, // U+110C0...U+110CF 4372, // U+11140...U+1114F 4375, // U+11170...U+1117F 4380, // U+111C0...U+111CF 4387, // U+11230...U+1123F 4428, // U+114C0...U+114CF 4444, // U+115C0...U+115CF 4452, // U+11640...U+1164F 4679, // U+12470...U+1247F 5798, // U+16A60...U+16A6F 5807, // U+16AF0...U+16AFF 5811, // U+16B30...U+16B3F 5812, // U+16B40...U+16B4F 7113, // U+1BC90...U+1BC9F ]; const PUNCT_MASKS: [u16; 132] = [ 0x0882, // U+00A0...U+00AF 0x88c0, // U+00B0...U+00BF 0x4000, // U+0370...U+037F 0x0080, // U+0380...U+038F 0xfc00, // U+0550...U+055F 0x0600, // U+0580...U+058F 0x4000, // U+05B0...U+05BF 0x0049, // U+05C0...U+05CF 0x0018, // U+05F0...U+05FF 0x3600, // U+0600...U+060F 0xc800, // U+0610...U+061F 0x3c00, // U+0660...U+066F 0x0010, // U+06D0...U+06DF 0x3fff, // U+0700...U+070F 0x0380, // U+07F0...U+07FF 0x7fff, // U+0830...U+083F 0x4000, // U+0850...U+085F 0x0030, // U+0960...U+096F 0x0001, // U+0970...U+097F 0x0001, // U+0AF0...U+0AFF 0x0010, // U+0DF0...U+0DFF 0x8000, // U+0E40...U+0E4F 0x0c00, // U+0E50...U+0E5F 0xfff0, // U+0F00...U+0F0F 0x0017, // U+0F10...U+0F1F 0x3c00, // U+0F30...U+0F3F 0x0020, // U+0F80...U+0F8F 0x061f, // U+0FD0...U+0FDF 0xfc00, // U+1040...U+104F 0x0800, // U+10F0...U+10FF 0x01ff, // U+1360...U+136F 0x0001, // U+1400...U+140F 0x6000, // U+1660...U+166F 0x1800, // U+1690...U+169F 0x3800, // U+16E0...U+16EF 0x0060, // U+1730...U+173F 0x0770, // U+17D0...U+17DF 0x07ff, // U+1800...U+180F 0x0030, // U+1940...U+194F 0xc000, // U+1A10...U+1A1F 0x3f7f, // U+1AA0...U+1AAF 0xfc00, // U+1B50...U+1B5F 0x0001, // U+1B60...U+1B6F 0xf000, // U+1BF0...U+1BFF 0xf800, // U+1C30...U+1C3F 0xc000, // U+1C70...U+1C7F 0x00ff, // U+1CC0...U+1CCF 0x0008, // U+1CD0...U+1CDF 0xffff, // U+2010...U+201F 0x00ff, // U+2020...U+202F 0xffff, // U+2030...U+203F 0xffef, // U+2040...U+204F 0x7ffb, // U+2050...U+205F 0x6000, // U+2070...U+207F 0x6000, // U+2080...U+208F 0x0f00, // U+2300...U+230F 0x0600, // U+2320...U+232F 0xff00, // U+2760...U+276F 0x003f, // U+2770...U+277F 0x0060, // U+27C0...U+27CF 0xffc0, // U+27E0...U+27EF 0xfff8, // U+2980...U+298F 0x01ff, // U+2990...U+299F 0x0f00, // U+29D0...U+29DF 0x3000, // U+29F0...U+29FF 0xde00, // U+2CF0...U+2CFF 0x0001, // U+2D70...U+2D7F 0xffff, // U+2E00...U+2E0F 0xffff, // U+2E10...U+2E1F 0x7fff, // U+2E20...U+2E2F 0xffff, // U+2E30...U+2E3F 0x0007, // U+2E40...U+2E4F 0xff0e, // U+3000...U+300F 0xfff3, // U+3010...U+301F 0x2001, // U+3030...U+303F 0x0001, // U+30A0...U+30AF 0x0800, // U+30F0...U+30FF 0xc000, // U+A4F0...U+A4FF 0xe000, // U+A600...U+A60F 0x4008, // U+A670...U+A67F 0x00fc, // U+A6F0...U+A6FF 0x00f0, // U+A870...U+A87F 0xc000, // U+A8C0...U+A8CF 0x0700, // U+A8F0...U+A8FF 0xc000, // U+A920...U+A92F 0x8000, // U+A950...U+A95F 0x3ffe, // U+A9C0...U+A9CF 0xc000, // U+A9D0...U+A9DF 0xf000, // U+AA50...U+AA5F 0xc000, // U+AAD0...U+AADF 0x0003, // U+AAF0...U+AAFF 0x0800, // U+ABE0...U+ABEF 0xc000, // U+FD30...U+FD3F 0x03ff, // U+FE10...U+FE1F 0xffff, // U+FE30...U+FE3F 0xffff, // U+FE40...U+FE4F 0xfff7, // U+FE50...U+FE5F 0x0d0b, // U+FE60...U+FE6F 0xf7ee, // U+FF00...U+FF0F 0x8c00, // U+FF10...U+FF1F 0x0001, // U+FF20...U+FF2F 0xb800, // U+FF30...U+FF3F 0xa800, // U+FF50...U+FF5F 0x003f, // U+FF60...U+FF6F 0x0007, // U+10100...U+1010F 0x8000, // U+10390...U+1039F 0x0001, // U+103D0...U+103DF 0x8000, // U+10560...U+1056F 0x0080, // U+10850...U+1085F 0x8000, // U+10910...U+1091F 0x8000, // U+10930...U+1093F 0x01ff, // U+10A50...U+10A5F 0x8000, // U+10A70...U+10A7F 0x007f, // U+10AF0...U+10AFF 0xfe00, // U+10B30...U+10B3F 0x1e00, // U+10B90...U+10B9F 0x3f80, // U+11040...U+1104F 0xd800, // U+110B0...U+110BF 0x0003, // U+110C0...U+110CF 0x000f, // U+11140...U+1114F 0x0030, // U+11170...U+1117F 0x21e0, // U+111C0...U+111CF 0x3f00, // U+11230...U+1123F 0x0040, // U+114C0...U+114CF 0x03fe, // U+115C0...U+115CF 0x000e, // U+11640...U+1164F 0x001f, // U+12470...U+1247F 0xc000, // U+16A60...U+16A6F 0x0020, // U+16AF0...U+16AFF 0x0f80, // U+16B30...U+16B3F 0x0010, // U+16B40...U+16B4F 0x8000, // U+1BC90...U+1BC9F ]; pub fn is_ascii_punctuation(c: u8) -> bool { c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0 } pub fn is_punctuation(c: char) -> bool { let cp = c as u32; if cp < 128 {return is_ascii_punctuation(cp as u8); } if cp > 0x1BC9F { return false; } let high = (cp / 16) as u16; match PUNCT_TAB.binary_search(&high) { Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0, _ => false } } #[cfg(test)] mod tests { use super::{is_ascii_punctuation, is_punctuation}; #[test] fn test_ascii() { assert!(is_ascii_punctuation(b'!')); assert!(is_ascii_punctuation(b'@')); assert!(is_ascii_punctuation(b'~')); assert!(!is_ascii_punctuation(b' ')); assert!(!is_ascii_punctuation(b'0')); assert!(!is_ascii_punctuation(b'A')); assert!(!is_ascii_punctuation(0xA1)); } #[test] fn test_unicode() { assert!(is_punctuation('~')); assert!(!is_punctuation(' ')); assert!(is_punctuation('\u{00A1}')); assert!(is_punctuation('\u{060C}')); assert!(is_punctuation('\u{FF65}')); assert!(is_punctuation('\u{1BC9F}')); assert!(!is_punctuation('\u{1BCA0}')); } }