1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

//! CommonMark punctuation set based on spec and Unicode properties.

// Autogenerated by mk_puncttable.py

const PUNCT_MASKS_ASCII: [u16; 8] = [
        0x0000,  // U+0000...U+000F
        0x0000,  // U+0010...U+001F
        0xfffe,  // U+0020...U+002F
        0xfc00,  // U+0030...U+003F
        0x0001,  // U+0040...U+004F
        0xf800,  // U+0050...U+005F
        0x0001,  // U+0060...U+006F
        0x7800,  // U+0070...U+007F
    ];

const PUNCT_TAB: [u16; 132] = [
        10,  // U+00A0...U+00AF
        11,  // U+00B0...U+00BF
        55,  // U+0370...U+037F
        56,  // U+0380...U+038F
        85,  // U+0550...U+055F
        88,  // U+0580...U+058F
        91,  // U+05B0...U+05BF
        92,  // U+05C0...U+05CF
        95,  // U+05F0...U+05FF
        96,  // U+0600...U+060F
        97,  // U+0610...U+061F
        102,  // U+0660...U+066F
        109,  // U+06D0...U+06DF
        112,  // U+0700...U+070F
        127,  // U+07F0...U+07FF
        131,  // U+0830...U+083F
        133,  // U+0850...U+085F
        150,  // U+0960...U+096F
        151,  // U+0970...U+097F
        175,  // U+0AF0...U+0AFF
        223,  // U+0DF0...U+0DFF
        228,  // U+0E40...U+0E4F
        229,  // U+0E50...U+0E5F
        240,  // U+0F00...U+0F0F
        241,  // U+0F10...U+0F1F
        243,  // U+0F30...U+0F3F
        248,  // U+0F80...U+0F8F
        253,  // U+0FD0...U+0FDF
        260,  // U+1040...U+104F
        271,  // U+10F0...U+10FF
        310,  // U+1360...U+136F
        320,  // U+1400...U+140F
        358,  // U+1660...U+166F
        361,  // U+1690...U+169F
        366,  // U+16E0...U+16EF
        371,  // U+1730...U+173F
        381,  // U+17D0...U+17DF
        384,  // U+1800...U+180F
        404,  // U+1940...U+194F
        417,  // U+1A10...U+1A1F
        426,  // U+1AA0...U+1AAF
        437,  // U+1B50...U+1B5F
        438,  // U+1B60...U+1B6F
        447,  // U+1BF0...U+1BFF
        451,  // U+1C30...U+1C3F
        455,  // U+1C70...U+1C7F
        460,  // U+1CC0...U+1CCF
        461,  // U+1CD0...U+1CDF
        513,  // U+2010...U+201F
        514,  // U+2020...U+202F
        515,  // U+2030...U+203F
        516,  // U+2040...U+204F
        517,  // U+2050...U+205F
        519,  // U+2070...U+207F
        520,  // U+2080...U+208F
        560,  // U+2300...U+230F
        562,  // U+2320...U+232F
        630,  // U+2760...U+276F
        631,  // U+2770...U+277F
        636,  // U+27C0...U+27CF
        638,  // U+27E0...U+27EF
        664,  // U+2980...U+298F
        665,  // U+2990...U+299F
        669,  // U+29D0...U+29DF
        671,  // U+29F0...U+29FF
        719,  // U+2CF0...U+2CFF
        727,  // U+2D70...U+2D7F
        736,  // U+2E00...U+2E0F
        737,  // U+2E10...U+2E1F
        738,  // U+2E20...U+2E2F
        739,  // U+2E30...U+2E3F
        740,  // U+2E40...U+2E4F
        768,  // U+3000...U+300F
        769,  // U+3010...U+301F
        771,  // U+3030...U+303F
        778,  // U+30A0...U+30AF
        783,  // U+30F0...U+30FF
        2639,  // U+A4F0...U+A4FF
        2656,  // U+A600...U+A60F
        2663,  // U+A670...U+A67F
        2671,  // U+A6F0...U+A6FF
        2695,  // U+A870...U+A87F
        2700,  // U+A8C0...U+A8CF
        2703,  // U+A8F0...U+A8FF
        2706,  // U+A920...U+A92F
        2709,  // U+A950...U+A95F
        2716,  // U+A9C0...U+A9CF
        2717,  // U+A9D0...U+A9DF
        2725,  // U+AA50...U+AA5F
        2733,  // U+AAD0...U+AADF
        2735,  // U+AAF0...U+AAFF
        2750,  // U+ABE0...U+ABEF
        4051,  // U+FD30...U+FD3F
        4065,  // U+FE10...U+FE1F
        4067,  // U+FE30...U+FE3F
        4068,  // U+FE40...U+FE4F
        4069,  // U+FE50...U+FE5F
        4070,  // U+FE60...U+FE6F
        4080,  // U+FF00...U+FF0F
        4081,  // U+FF10...U+FF1F
        4082,  // U+FF20...U+FF2F
        4083,  // U+FF30...U+FF3F
        4085,  // U+FF50...U+FF5F
        4086,  // U+FF60...U+FF6F
        4112,  // U+10100...U+1010F
        4153,  // U+10390...U+1039F
        4157,  // U+103D0...U+103DF
        4182,  // U+10560...U+1056F
        4229,  // U+10850...U+1085F
        4241,  // U+10910...U+1091F
        4243,  // U+10930...U+1093F
        4261,  // U+10A50...U+10A5F
        4263,  // U+10A70...U+10A7F
        4271,  // U+10AF0...U+10AFF
        4275,  // U+10B30...U+10B3F
        4281,  // U+10B90...U+10B9F
        4356,  // U+11040...U+1104F
        4363,  // U+110B0...U+110BF
        4364,  // U+110C0...U+110CF
        4372,  // U+11140...U+1114F
        4375,  // U+11170...U+1117F
        4380,  // U+111C0...U+111CF
        4387,  // U+11230...U+1123F
        4428,  // U+114C0...U+114CF
        4444,  // U+115C0...U+115CF
        4452,  // U+11640...U+1164F
        4679,  // U+12470...U+1247F
        5798,  // U+16A60...U+16A6F
        5807,  // U+16AF0...U+16AFF
        5811,  // U+16B30...U+16B3F
        5812,  // U+16B40...U+16B4F
        7113,  // U+1BC90...U+1BC9F
    ];

const PUNCT_MASKS: [u16; 132] = [
        0x0882,  // U+00A0...U+00AF
        0x88c0,  // U+00B0...U+00BF
        0x4000,  // U+0370...U+037F
        0x0080,  // U+0380...U+038F
        0xfc00,  // U+0550...U+055F
        0x0600,  // U+0580...U+058F
        0x4000,  // U+05B0...U+05BF
        0x0049,  // U+05C0...U+05CF
        0x0018,  // U+05F0...U+05FF
        0x3600,  // U+0600...U+060F
        0xc800,  // U+0610...U+061F
        0x3c00,  // U+0660...U+066F
        0x0010,  // U+06D0...U+06DF
        0x3fff,  // U+0700...U+070F
        0x0380,  // U+07F0...U+07FF
        0x7fff,  // U+0830...U+083F
        0x4000,  // U+0850...U+085F
        0x0030,  // U+0960...U+096F
        0x0001,  // U+0970...U+097F
        0x0001,  // U+0AF0...U+0AFF
        0x0010,  // U+0DF0...U+0DFF
        0x8000,  // U+0E40...U+0E4F
        0x0c00,  // U+0E50...U+0E5F
        0xfff0,  // U+0F00...U+0F0F
        0x0017,  // U+0F10...U+0F1F
        0x3c00,  // U+0F30...U+0F3F
        0x0020,  // U+0F80...U+0F8F
        0x061f,  // U+0FD0...U+0FDF
        0xfc00,  // U+1040...U+104F
        0x0800,  // U+10F0...U+10FF
        0x01ff,  // U+1360...U+136F
        0x0001,  // U+1400...U+140F
        0x6000,  // U+1660...U+166F
        0x1800,  // U+1690...U+169F
        0x3800,  // U+16E0...U+16EF
        0x0060,  // U+1730...U+173F
        0x0770,  // U+17D0...U+17DF
        0x07ff,  // U+1800...U+180F
        0x0030,  // U+1940...U+194F
        0xc000,  // U+1A10...U+1A1F
        0x3f7f,  // U+1AA0...U+1AAF
        0xfc00,  // U+1B50...U+1B5F
        0x0001,  // U+1B60...U+1B6F
        0xf000,  // U+1BF0...U+1BFF
        0xf800,  // U+1C30...U+1C3F
        0xc000,  // U+1C70...U+1C7F
        0x00ff,  // U+1CC0...U+1CCF
        0x0008,  // U+1CD0...U+1CDF
        0xffff,  // U+2010...U+201F
        0x00ff,  // U+2020...U+202F
        0xffff,  // U+2030...U+203F
        0xffef,  // U+2040...U+204F
        0x7ffb,  // U+2050...U+205F
        0x6000,  // U+2070...U+207F
        0x6000,  // U+2080...U+208F
        0x0f00,  // U+2300...U+230F
        0x0600,  // U+2320...U+232F
        0xff00,  // U+2760...U+276F
        0x003f,  // U+2770...U+277F
        0x0060,  // U+27C0...U+27CF
        0xffc0,  // U+27E0...U+27EF
        0xfff8,  // U+2980...U+298F
        0x01ff,  // U+2990...U+299F
        0x0f00,  // U+29D0...U+29DF
        0x3000,  // U+29F0...U+29FF
        0xde00,  // U+2CF0...U+2CFF
        0x0001,  // U+2D70...U+2D7F
        0xffff,  // U+2E00...U+2E0F
        0xffff,  // U+2E10...U+2E1F
        0x7fff,  // U+2E20...U+2E2F
        0xffff,  // U+2E30...U+2E3F
        0x0007,  // U+2E40...U+2E4F
        0xff0e,  // U+3000...U+300F
        0xfff3,  // U+3010...U+301F
        0x2001,  // U+3030...U+303F
        0x0001,  // U+30A0...U+30AF
        0x0800,  // U+30F0...U+30FF
        0xc000,  // U+A4F0...U+A4FF
        0xe000,  // U+A600...U+A60F
        0x4008,  // U+A670...U+A67F
        0x00fc,  // U+A6F0...U+A6FF
        0x00f0,  // U+A870...U+A87F
        0xc000,  // U+A8C0...U+A8CF
        0x0700,  // U+A8F0...U+A8FF
        0xc000,  // U+A920...U+A92F
        0x8000,  // U+A950...U+A95F
        0x3ffe,  // U+A9C0...U+A9CF
        0xc000,  // U+A9D0...U+A9DF
        0xf000,  // U+AA50...U+AA5F
        0xc000,  // U+AAD0...U+AADF
        0x0003,  // U+AAF0...U+AAFF
        0x0800,  // U+ABE0...U+ABEF
        0xc000,  // U+FD30...U+FD3F
        0x03ff,  // U+FE10...U+FE1F
        0xffff,  // U+FE30...U+FE3F
        0xffff,  // U+FE40...U+FE4F
        0xfff7,  // U+FE50...U+FE5F
        0x0d0b,  // U+FE60...U+FE6F
        0xf7ee,  // U+FF00...U+FF0F
        0x8c00,  // U+FF10...U+FF1F
        0x0001,  // U+FF20...U+FF2F
        0xb800,  // U+FF30...U+FF3F
        0xa800,  // U+FF50...U+FF5F
        0x003f,  // U+FF60...U+FF6F
        0x0007,  // U+10100...U+1010F
        0x8000,  // U+10390...U+1039F
        0x0001,  // U+103D0...U+103DF
        0x8000,  // U+10560...U+1056F
        0x0080,  // U+10850...U+1085F
        0x8000,  // U+10910...U+1091F
        0x8000,  // U+10930...U+1093F
        0x01ff,  // U+10A50...U+10A5F
        0x8000,  // U+10A70...U+10A7F
        0x007f,  // U+10AF0...U+10AFF
        0xfe00,  // U+10B30...U+10B3F
        0x1e00,  // U+10B90...U+10B9F
        0x3f80,  // U+11040...U+1104F
        0xd800,  // U+110B0...U+110BF
        0x0003,  // U+110C0...U+110CF
        0x000f,  // U+11140...U+1114F
        0x0030,  // U+11170...U+1117F
        0x21e0,  // U+111C0...U+111CF
        0x3f00,  // U+11230...U+1123F
        0x0040,  // U+114C0...U+114CF
        0x03fe,  // U+115C0...U+115CF
        0x000e,  // U+11640...U+1164F
        0x001f,  // U+12470...U+1247F
        0xc000,  // U+16A60...U+16A6F
        0x0020,  // U+16AF0...U+16AFF
        0x0f80,  // U+16B30...U+16B3F
        0x0010,  // U+16B40...U+16B4F
        0x8000,  // U+1BC90...U+1BC9F
    ];

pub fn is_ascii_punctuation(c: u8) -> bool {
    c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}

pub fn is_punctuation(c: char) -> bool {
    let cp = c as u32;
    if cp < 128 {return is_ascii_punctuation(cp as u8); }
    if cp > 0x1BC9F { return false; }
    let high = (cp / 16) as u16;
    match PUNCT_TAB.binary_search(&high) {
        Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
        _ => false
    }
}

#[cfg(test)]
mod tests {
    use super::{is_ascii_punctuation, is_punctuation};

    #[test]
    fn test_ascii() {
        assert!(is_ascii_punctuation(b'!'));
        assert!(is_ascii_punctuation(b'@'));
        assert!(is_ascii_punctuation(b'~'));
        assert!(!is_ascii_punctuation(b' '));
        assert!(!is_ascii_punctuation(b'0'));
        assert!(!is_ascii_punctuation(b'A'));
        assert!(!is_ascii_punctuation(0xA1));
    }

    #[test]
    fn test_unicode() {
        assert!(is_punctuation('~'));
        assert!(!is_punctuation(' '));

        assert!(is_punctuation('\u{00A1}'));
        assert!(is_punctuation('\u{060C}'));
        assert!(is_punctuation('\u{FF65}'));
        assert!(is_punctuation('\u{1BC9F}'));
        assert!(!is_punctuation('\u{1BCA0}'));
    }
}