Source file
src/unicode/letter_test.go
Documentation: unicode
1
2
3
4
5 package unicode_test
6
7 import (
8 "flag"
9 "fmt"
10 "runtime"
11 "sort"
12 "strings"
13 "testing"
14 . "unicode"
15 )
16
17 var upperTest = []rune{
18 0x41,
19 0xc0,
20 0xd8,
21 0x100,
22 0x139,
23 0x14a,
24 0x178,
25 0x181,
26 0x376,
27 0x3cf,
28 0x13bd,
29 0x1f2a,
30 0x2102,
31 0x2c00,
32 0x2c10,
33 0x2c20,
34 0xa650,
35 0xa722,
36 0xff3a,
37 0x10400,
38 0x1d400,
39 0x1d7ca,
40 }
41
42 var notupperTest = []rune{
43 0x40,
44 0x5b,
45 0x61,
46 0x185,
47 0x1b0,
48 0x377,
49 0x387,
50 0x2150,
51 0xab7d,
52 0xffff,
53 0x10000,
54 }
55
56 var letterTest = []rune{
57 0x41,
58 0x61,
59 0xaa,
60 0xba,
61 0xc8,
62 0xdb,
63 0xf9,
64 0x2ec,
65 0x535,
66 0x620,
67 0x6e6,
68 0x93d,
69 0xa15,
70 0xb99,
71 0xdc0,
72 0xedd,
73 0x1000,
74 0x1200,
75 0x1312,
76 0x1401,
77 0x2c00,
78 0xa800,
79 0xf900,
80 0xfa30,
81 0xffda,
82 0xffdc,
83 0x10000,
84 0x10300,
85 0x10400,
86 0x20000,
87 0x2f800,
88 0x2fa1d,
89 }
90
91 var notletterTest = []rune{
92 0x20,
93 0x35,
94 0x375,
95 0x619,
96 0x700,
97 0x1885,
98 0xfffe,
99 0x1ffff,
100 0x10ffff,
101 }
102
103
104 var spaceTest = []rune{
105 0x09,
106 0x0a,
107 0x0b,
108 0x0c,
109 0x0d,
110 0x20,
111 0x85,
112 0xA0,
113 0x2000,
114 0x3000,
115 }
116
117 type caseT struct {
118 cas int
119 in, out rune
120 }
121
122 var caseTest = []caseT{
123
124 {-1, '\n', 0xFFFD},
125 {UpperCase, -1, -1},
126 {UpperCase, 1 << 30, 1 << 30},
127
128
129 {UpperCase, '\n', '\n'},
130 {UpperCase, 'a', 'A'},
131 {UpperCase, 'A', 'A'},
132 {UpperCase, '7', '7'},
133 {LowerCase, '\n', '\n'},
134 {LowerCase, 'a', 'a'},
135 {LowerCase, 'A', 'a'},
136 {LowerCase, '7', '7'},
137 {TitleCase, '\n', '\n'},
138 {TitleCase, 'a', 'A'},
139 {TitleCase, 'A', 'A'},
140 {TitleCase, '7', '7'},
141
142
143 {UpperCase, 0x80, 0x80},
144 {UpperCase, 'Å', 'Å'},
145 {UpperCase, 'å', 'Å'},
146 {LowerCase, 0x80, 0x80},
147 {LowerCase, 'Å', 'å'},
148 {LowerCase, 'å', 'å'},
149 {TitleCase, 0x80, 0x80},
150 {TitleCase, 'Å', 'Å'},
151 {TitleCase, 'å', 'Å'},
152
153
154 {UpperCase, 0x0131, 'I'},
155 {LowerCase, 0x0131, 0x0131},
156 {TitleCase, 0x0131, 'I'},
157
158
159 {UpperCase, 0x0133, 0x0132},
160 {LowerCase, 0x0133, 0x0133},
161 {TitleCase, 0x0133, 0x0132},
162
163
164 {UpperCase, 0x212A, 0x212A},
165 {LowerCase, 0x212A, 'k'},
166 {TitleCase, 0x212A, 0x212A},
167
168
169
170 {UpperCase, 0xA640, 0xA640},
171 {LowerCase, 0xA640, 0xA641},
172 {TitleCase, 0xA640, 0xA640},
173
174 {UpperCase, 0xA641, 0xA640},
175 {LowerCase, 0xA641, 0xA641},
176 {TitleCase, 0xA641, 0xA640},
177
178 {UpperCase, 0xA64E, 0xA64E},
179 {LowerCase, 0xA64E, 0xA64F},
180 {TitleCase, 0xA64E, 0xA64E},
181
182 {UpperCase, 0xA65F, 0xA65E},
183 {LowerCase, 0xA65F, 0xA65F},
184 {TitleCase, 0xA65F, 0xA65E},
185
186
187
188 {UpperCase, 0x0139, 0x0139},
189 {LowerCase, 0x0139, 0x013A},
190 {TitleCase, 0x0139, 0x0139},
191
192 {UpperCase, 0x013f, 0x013f},
193 {LowerCase, 0x013f, 0x0140},
194 {TitleCase, 0x013f, 0x013f},
195
196 {UpperCase, 0x0148, 0x0147},
197 {LowerCase, 0x0148, 0x0148},
198 {TitleCase, 0x0148, 0x0147},
199
200
201
202 {UpperCase, 0xab78, 0x13a8},
203 {LowerCase, 0xab78, 0xab78},
204 {TitleCase, 0xab78, 0x13a8},
205 {UpperCase, 0x13a8, 0x13a8},
206 {LowerCase, 0x13a8, 0xab78},
207 {TitleCase, 0x13a8, 0x13a8},
208
209
210
211 {UpperCase, 0x10400, 0x10400},
212 {LowerCase, 0x10400, 0x10428},
213 {TitleCase, 0x10400, 0x10400},
214
215 {UpperCase, 0x10427, 0x10427},
216 {LowerCase, 0x10427, 0x1044F},
217 {TitleCase, 0x10427, 0x10427},
218
219 {UpperCase, 0x10428, 0x10400},
220 {LowerCase, 0x10428, 0x10428},
221 {TitleCase, 0x10428, 0x10400},
222
223 {UpperCase, 0x1044F, 0x10427},
224 {LowerCase, 0x1044F, 0x1044F},
225 {TitleCase, 0x1044F, 0x10427},
226
227
228
229 {UpperCase, 0x10450, 0x10450},
230 {LowerCase, 0x10450, 0x10450},
231 {TitleCase, 0x10450, 0x10450},
232
233
234 {LowerCase, 0x2161, 0x2171},
235 {UpperCase, 0x0345, 0x0399},
236 }
237
238 func TestIsLetter(t *testing.T) {
239 for _, r := range upperTest {
240 if !IsLetter(r) {
241 t.Errorf("IsLetter(U+%04X) = false, want true", r)
242 }
243 }
244 for _, r := range letterTest {
245 if !IsLetter(r) {
246 t.Errorf("IsLetter(U+%04X) = false, want true", r)
247 }
248 }
249 for _, r := range notletterTest {
250 if IsLetter(r) {
251 t.Errorf("IsLetter(U+%04X) = true, want false", r)
252 }
253 }
254 }
255
256 func TestIsUpper(t *testing.T) {
257 for _, r := range upperTest {
258 if !IsUpper(r) {
259 t.Errorf("IsUpper(U+%04X) = false, want true", r)
260 }
261 }
262 for _, r := range notupperTest {
263 if IsUpper(r) {
264 t.Errorf("IsUpper(U+%04X) = true, want false", r)
265 }
266 }
267 for _, r := range notletterTest {
268 if IsUpper(r) {
269 t.Errorf("IsUpper(U+%04X) = true, want false", r)
270 }
271 }
272 }
273
274 func caseString(c int) string {
275 switch c {
276 case UpperCase:
277 return "UpperCase"
278 case LowerCase:
279 return "LowerCase"
280 case TitleCase:
281 return "TitleCase"
282 }
283 return "ErrorCase"
284 }
285
286 func TestTo(t *testing.T) {
287 for _, c := range caseTest {
288 r := To(c.cas, c.in)
289 if c.out != r {
290 t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
291 }
292 }
293 }
294
295 func TestToUpperCase(t *testing.T) {
296 for _, c := range caseTest {
297 if c.cas != UpperCase {
298 continue
299 }
300 r := ToUpper(c.in)
301 if c.out != r {
302 t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
303 }
304 }
305 }
306
307 func TestToLowerCase(t *testing.T) {
308 for _, c := range caseTest {
309 if c.cas != LowerCase {
310 continue
311 }
312 r := ToLower(c.in)
313 if c.out != r {
314 t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
315 }
316 }
317 }
318
319 func TestToTitleCase(t *testing.T) {
320 for _, c := range caseTest {
321 if c.cas != TitleCase {
322 continue
323 }
324 r := ToTitle(c.in)
325 if c.out != r {
326 t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
327 }
328 }
329 }
330
331 func TestIsSpace(t *testing.T) {
332 for _, c := range spaceTest {
333 if !IsSpace(c) {
334 t.Errorf("IsSpace(U+%04X) = false; want true", c)
335 }
336 }
337 for _, c := range letterTest {
338 if IsSpace(c) {
339 t.Errorf("IsSpace(U+%04X) = true; want false", c)
340 }
341 }
342 }
343
344
345
346 func TestLetterOptimizations(t *testing.T) {
347 for i := rune(0); i <= MaxLatin1; i++ {
348 if Is(Letter, i) != IsLetter(i) {
349 t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
350 }
351 if Is(Upper, i) != IsUpper(i) {
352 t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
353 }
354 if Is(Lower, i) != IsLower(i) {
355 t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
356 }
357 if Is(Title, i) != IsTitle(i) {
358 t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
359 }
360 if Is(White_Space, i) != IsSpace(i) {
361 t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
362 }
363 if To(UpperCase, i) != ToUpper(i) {
364 t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
365 }
366 if To(LowerCase, i) != ToLower(i) {
367 t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
368 }
369 if To(TitleCase, i) != ToTitle(i) {
370 t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
371 }
372 }
373 }
374
375 func TestTurkishCase(t *testing.T) {
376 lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
377 upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
378 for i, l := range lower {
379 u := upper[i]
380 if TurkishCase.ToLower(l) != l {
381 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
382 }
383 if TurkishCase.ToUpper(u) != u {
384 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
385 }
386 if TurkishCase.ToUpper(l) != u {
387 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
388 }
389 if TurkishCase.ToLower(u) != l {
390 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
391 }
392 if TurkishCase.ToTitle(u) != u {
393 t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
394 }
395 if TurkishCase.ToTitle(l) != u {
396 t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
397 }
398 }
399 }
400
401 var simpleFoldTests = []string{
402
403
404
405
406 "Aa",
407 "δΔ",
408
409
410 "KkK",
411 "Ssſ",
412
413
414 "ρϱΡ",
415 "ͅΙιι",
416
417
418 "İ",
419 "ı",
420
421
422 "\u13b0\uab80",
423 }
424
425 func TestSimpleFold(t *testing.T) {
426 for _, tt := range simpleFoldTests {
427 cycle := []rune(tt)
428 r := cycle[len(cycle)-1]
429 for _, out := range cycle {
430 if r := SimpleFold(r); r != out {
431 t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
432 }
433 r = out
434 }
435 }
436
437 if r := SimpleFold(-42); r != -42 {
438 t.Errorf("SimpleFold(-42) = %v, want -42", r)
439 }
440 }
441
442
443
444
445
446
447
448
449
450
451
452 var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
453
454 func TestCalibrate(t *testing.T) {
455 if !*calibrate {
456 return
457 }
458
459 if runtime.GOARCH == "amd64" {
460 fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
461 }
462
463
464
465
466
467 n := sort.Search(64, func(n int) bool {
468 tab := fakeTable(n)
469 blinear := func(b *testing.B) {
470 tab := tab
471 max := n*5 + 20
472 for i := 0; i < b.N; i++ {
473 for j := 0; j <= max; j++ {
474 linear(tab, uint16(j))
475 }
476 }
477 }
478 bbinary := func(b *testing.B) {
479 tab := tab
480 max := n*5 + 20
481 for i := 0; i < b.N; i++ {
482 for j := 0; j <= max; j++ {
483 binary(tab, uint16(j))
484 }
485 }
486 }
487 bmlinear := testing.Benchmark(blinear)
488 bmbinary := testing.Benchmark(bbinary)
489 fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
490 return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
491 })
492 fmt.Printf("calibration: linear cutoff = %d\n", n)
493 }
494
495 func fakeTable(n int) []Range16 {
496 var r16 []Range16
497 for i := 0; i < n; i++ {
498 r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
499 }
500 return r16
501 }
502
503 func linear(ranges []Range16, r uint16) bool {
504 for i := range ranges {
505 range_ := &ranges[i]
506 if r < range_.Lo {
507 return false
508 }
509 if r <= range_.Hi {
510 return (r-range_.Lo)%range_.Stride == 0
511 }
512 }
513 return false
514 }
515
516 func binary(ranges []Range16, r uint16) bool {
517
518 lo := 0
519 hi := len(ranges)
520 for lo < hi {
521 m := int(uint(lo+hi) >> 1)
522 range_ := &ranges[m]
523 if range_.Lo <= r && r <= range_.Hi {
524 return (r-range_.Lo)%range_.Stride == 0
525 }
526 if r < range_.Lo {
527 hi = m
528 } else {
529 lo = m + 1
530 }
531 }
532 return false
533 }
534
535 func TestLatinOffset(t *testing.T) {
536 var maps = []map[string]*RangeTable{
537 Categories,
538 FoldCategory,
539 FoldScript,
540 Properties,
541 Scripts,
542 }
543 for _, m := range maps {
544 for name, tab := range m {
545 i := 0
546 for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
547 i++
548 }
549 if tab.LatinOffset != i {
550 t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
551 }
552 }
553 }
554 }
555
556 func TestSpecialCaseNoMapping(t *testing.T) {
557
558
559 var noChangeForCapitalA = CaseRange{'A', 'A', [MaxCase]rune{0, 0, 0}}
560 got := strings.ToLowerSpecial(SpecialCase([]CaseRange{noChangeForCapitalA}), "ABC")
561 want := "Abc"
562 if got != want {
563 t.Errorf("got %q; want %q", got, want)
564 }
565 }
566
567 func TestNegativeRune(t *testing.T) {
568
569
570
571
572
573
574 nonLatin1 := []uint32{
575
576 0x0100,
577
578 0x0101,
579
580 0x01C5,
581
582 0x0300,
583
584 0x0660,
585
586 0x037E,
587
588 0x02C2,
589
590 0x1680,
591 }
592 for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
593 base := uint32(i)
594 if i >= MaxLatin1 {
595 base = nonLatin1[i-MaxLatin1]
596 }
597
598
599
600 r := rune(base - 1<<31)
601 if Is(Letter, r) {
602 t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
603 }
604 if IsControl(r) {
605 t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
606 }
607 if IsDigit(r) {
608 t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
609 }
610 if IsGraphic(r) {
611 t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
612 }
613 if IsLetter(r) {
614 t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
615 }
616 if IsLower(r) {
617 t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
618 }
619 if IsMark(r) {
620 t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
621 }
622 if IsNumber(r) {
623 t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
624 }
625 if IsPrint(r) {
626 t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
627 }
628 if IsPunct(r) {
629 t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
630 }
631 if IsSpace(r) {
632 t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
633 }
634 if IsSymbol(r) {
635 t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
636 }
637 if IsTitle(r) {
638 t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
639 }
640 if IsUpper(r) {
641 t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
642 }
643 }
644 }
645
View as plain text