Source file
    src/unicode/letter_test.go
  
  
    Documentation: unicode
  
     1  
     2  
     3  
     4  
     5  package unicode_test
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"runtime"
    11  	"sort"
    12  	"strings"
    13  	"testing"
    14  	. "unicode"
    15  )
    16  
    17  var upperTest = []rune{
    18  	0x41,
    19  	0xc0,
    20  	0xd8,
    21  	0x100,
    22  	0x139,
    23  	0x14a,
    24  	0x178,
    25  	0x181,
    26  	0x376,
    27  	0x3cf,
    28  	0x13bd,
    29  	0x1f2a,
    30  	0x2102,
    31  	0x2c00,
    32  	0x2c10,
    33  	0x2c20,
    34  	0xa650,
    35  	0xa722,
    36  	0xff3a,
    37  	0x10400,
    38  	0x1d400,
    39  	0x1d7ca,
    40  }
    41  
    42  var notupperTest = []rune{
    43  	0x40,
    44  	0x5b,
    45  	0x61,
    46  	0x185,
    47  	0x1b0,
    48  	0x377,
    49  	0x387,
    50  	0x2150,
    51  	0xab7d,
    52  	0xffff,
    53  	0x10000,
    54  }
    55  
    56  var letterTest = []rune{
    57  	0x41,
    58  	0x61,
    59  	0xaa,
    60  	0xba,
    61  	0xc8,
    62  	0xdb,
    63  	0xf9,
    64  	0x2ec,
    65  	0x535,
    66  	0x620,
    67  	0x6e6,
    68  	0x93d,
    69  	0xa15,
    70  	0xb99,
    71  	0xdc0,
    72  	0xedd,
    73  	0x1000,
    74  	0x1200,
    75  	0x1312,
    76  	0x1401,
    77  	0x2c00,
    78  	0xa800,
    79  	0xf900,
    80  	0xfa30,
    81  	0xffda,
    82  	0xffdc,
    83  	0x10000,
    84  	0x10300,
    85  	0x10400,
    86  	0x20000,
    87  	0x2f800,
    88  	0x2fa1d,
    89  }
    90  
    91  var notletterTest = []rune{
    92  	0x20,
    93  	0x35,
    94  	0x375,
    95  	0x619,
    96  	0x700,
    97  	0x1885,
    98  	0xfffe,
    99  	0x1ffff,
   100  	0x10ffff,
   101  }
   102  
   103  
   104  var spaceTest = []rune{
   105  	0x09,
   106  	0x0a,
   107  	0x0b,
   108  	0x0c,
   109  	0x0d,
   110  	0x20,
   111  	0x85,
   112  	0xA0,
   113  	0x2000,
   114  	0x3000,
   115  }
   116  
   117  type caseT struct {
   118  	cas     int
   119  	in, out rune
   120  }
   121  
   122  var caseTest = []caseT{
   123  	
   124  	{-1, '\n', 0xFFFD},
   125  	{UpperCase, -1, -1},
   126  	{UpperCase, 1 << 30, 1 << 30},
   127  
   128  	
   129  	{UpperCase, '\n', '\n'},
   130  	{UpperCase, 'a', 'A'},
   131  	{UpperCase, 'A', 'A'},
   132  	{UpperCase, '7', '7'},
   133  	{LowerCase, '\n', '\n'},
   134  	{LowerCase, 'a', 'a'},
   135  	{LowerCase, 'A', 'a'},
   136  	{LowerCase, '7', '7'},
   137  	{TitleCase, '\n', '\n'},
   138  	{TitleCase, 'a', 'A'},
   139  	{TitleCase, 'A', 'A'},
   140  	{TitleCase, '7', '7'},
   141  
   142  	
   143  	{UpperCase, 0x80, 0x80},
   144  	{UpperCase, 'Å', 'Å'},
   145  	{UpperCase, 'å', 'Å'},
   146  	{LowerCase, 0x80, 0x80},
   147  	{LowerCase, 'Å', 'å'},
   148  	{LowerCase, 'å', 'å'},
   149  	{TitleCase, 0x80, 0x80},
   150  	{TitleCase, 'Å', 'Å'},
   151  	{TitleCase, 'å', 'Å'},
   152  
   153  	
   154  	{UpperCase, 0x0131, 'I'},
   155  	{LowerCase, 0x0131, 0x0131},
   156  	{TitleCase, 0x0131, 'I'},
   157  
   158  	
   159  	{UpperCase, 0x0133, 0x0132},
   160  	{LowerCase, 0x0133, 0x0133},
   161  	{TitleCase, 0x0133, 0x0132},
   162  
   163  	
   164  	{UpperCase, 0x212A, 0x212A},
   165  	{LowerCase, 0x212A, 'k'},
   166  	{TitleCase, 0x212A, 0x212A},
   167  
   168  	
   169  	
   170  	{UpperCase, 0xA640, 0xA640},
   171  	{LowerCase, 0xA640, 0xA641},
   172  	{TitleCase, 0xA640, 0xA640},
   173  	
   174  	{UpperCase, 0xA641, 0xA640},
   175  	{LowerCase, 0xA641, 0xA641},
   176  	{TitleCase, 0xA641, 0xA640},
   177  	
   178  	{UpperCase, 0xA64E, 0xA64E},
   179  	{LowerCase, 0xA64E, 0xA64F},
   180  	{TitleCase, 0xA64E, 0xA64E},
   181  	
   182  	{UpperCase, 0xA65F, 0xA65E},
   183  	{LowerCase, 0xA65F, 0xA65F},
   184  	{TitleCase, 0xA65F, 0xA65E},
   185  
   186  	
   187  	
   188  	{UpperCase, 0x0139, 0x0139},
   189  	{LowerCase, 0x0139, 0x013A},
   190  	{TitleCase, 0x0139, 0x0139},
   191  	
   192  	{UpperCase, 0x013f, 0x013f},
   193  	{LowerCase, 0x013f, 0x0140},
   194  	{TitleCase, 0x013f, 0x013f},
   195  	
   196  	{UpperCase, 0x0148, 0x0147},
   197  	{LowerCase, 0x0148, 0x0148},
   198  	{TitleCase, 0x0148, 0x0147},
   199  
   200  	
   201  	
   202  	{UpperCase, 0xab78, 0x13a8},
   203  	{LowerCase, 0xab78, 0xab78},
   204  	{TitleCase, 0xab78, 0x13a8},
   205  	{UpperCase, 0x13a8, 0x13a8},
   206  	{LowerCase, 0x13a8, 0xab78},
   207  	{TitleCase, 0x13a8, 0x13a8},
   208  
   209  	
   210  	
   211  	{UpperCase, 0x10400, 0x10400},
   212  	{LowerCase, 0x10400, 0x10428},
   213  	{TitleCase, 0x10400, 0x10400},
   214  	
   215  	{UpperCase, 0x10427, 0x10427},
   216  	{LowerCase, 0x10427, 0x1044F},
   217  	{TitleCase, 0x10427, 0x10427},
   218  	
   219  	{UpperCase, 0x10428, 0x10400},
   220  	{LowerCase, 0x10428, 0x10428},
   221  	{TitleCase, 0x10428, 0x10400},
   222  	
   223  	{UpperCase, 0x1044F, 0x10427},
   224  	{LowerCase, 0x1044F, 0x1044F},
   225  	{TitleCase, 0x1044F, 0x10427},
   226  
   227  	
   228  	
   229  	{UpperCase, 0x10450, 0x10450},
   230  	{LowerCase, 0x10450, 0x10450},
   231  	{TitleCase, 0x10450, 0x10450},
   232  
   233  	
   234  	{LowerCase, 0x2161, 0x2171},
   235  	{UpperCase, 0x0345, 0x0399},
   236  }
   237  
   238  func TestIsLetter(t *testing.T) {
   239  	for _, r := range upperTest {
   240  		if !IsLetter(r) {
   241  			t.Errorf("IsLetter(U+%04X) = false, want true", r)
   242  		}
   243  	}
   244  	for _, r := range letterTest {
   245  		if !IsLetter(r) {
   246  			t.Errorf("IsLetter(U+%04X) = false, want true", r)
   247  		}
   248  	}
   249  	for _, r := range notletterTest {
   250  		if IsLetter(r) {
   251  			t.Errorf("IsLetter(U+%04X) = true, want false", r)
   252  		}
   253  	}
   254  }
   255  
   256  func TestIsUpper(t *testing.T) {
   257  	for _, r := range upperTest {
   258  		if !IsUpper(r) {
   259  			t.Errorf("IsUpper(U+%04X) = false, want true", r)
   260  		}
   261  	}
   262  	for _, r := range notupperTest {
   263  		if IsUpper(r) {
   264  			t.Errorf("IsUpper(U+%04X) = true, want false", r)
   265  		}
   266  	}
   267  	for _, r := range notletterTest {
   268  		if IsUpper(r) {
   269  			t.Errorf("IsUpper(U+%04X) = true, want false", r)
   270  		}
   271  	}
   272  }
   273  
   274  func caseString(c int) string {
   275  	switch c {
   276  	case UpperCase:
   277  		return "UpperCase"
   278  	case LowerCase:
   279  		return "LowerCase"
   280  	case TitleCase:
   281  		return "TitleCase"
   282  	}
   283  	return "ErrorCase"
   284  }
   285  
   286  func TestTo(t *testing.T) {
   287  	for _, c := range caseTest {
   288  		r := To(c.cas, c.in)
   289  		if c.out != r {
   290  			t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
   291  		}
   292  	}
   293  }
   294  
   295  func TestToUpperCase(t *testing.T) {
   296  	for _, c := range caseTest {
   297  		if c.cas != UpperCase {
   298  			continue
   299  		}
   300  		r := ToUpper(c.in)
   301  		if c.out != r {
   302  			t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
   303  		}
   304  	}
   305  }
   306  
   307  func TestToLowerCase(t *testing.T) {
   308  	for _, c := range caseTest {
   309  		if c.cas != LowerCase {
   310  			continue
   311  		}
   312  		r := ToLower(c.in)
   313  		if c.out != r {
   314  			t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
   315  		}
   316  	}
   317  }
   318  
   319  func TestToTitleCase(t *testing.T) {
   320  	for _, c := range caseTest {
   321  		if c.cas != TitleCase {
   322  			continue
   323  		}
   324  		r := ToTitle(c.in)
   325  		if c.out != r {
   326  			t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
   327  		}
   328  	}
   329  }
   330  
   331  func TestIsSpace(t *testing.T) {
   332  	for _, c := range spaceTest {
   333  		if !IsSpace(c) {
   334  			t.Errorf("IsSpace(U+%04X) = false; want true", c)
   335  		}
   336  	}
   337  	for _, c := range letterTest {
   338  		if IsSpace(c) {
   339  			t.Errorf("IsSpace(U+%04X) = true; want false", c)
   340  		}
   341  	}
   342  }
   343  
   344  
   345  
   346  func TestLetterOptimizations(t *testing.T) {
   347  	for i := rune(0); i <= MaxLatin1; i++ {
   348  		if Is(Letter, i) != IsLetter(i) {
   349  			t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
   350  		}
   351  		if Is(Upper, i) != IsUpper(i) {
   352  			t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
   353  		}
   354  		if Is(Lower, i) != IsLower(i) {
   355  			t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
   356  		}
   357  		if Is(Title, i) != IsTitle(i) {
   358  			t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
   359  		}
   360  		if Is(White_Space, i) != IsSpace(i) {
   361  			t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
   362  		}
   363  		if To(UpperCase, i) != ToUpper(i) {
   364  			t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
   365  		}
   366  		if To(LowerCase, i) != ToLower(i) {
   367  			t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
   368  		}
   369  		if To(TitleCase, i) != ToTitle(i) {
   370  			t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
   371  		}
   372  	}
   373  }
   374  
   375  func TestTurkishCase(t *testing.T) {
   376  	lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
   377  	upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
   378  	for i, l := range lower {
   379  		u := upper[i]
   380  		if TurkishCase.ToLower(l) != l {
   381  			t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
   382  		}
   383  		if TurkishCase.ToUpper(u) != u {
   384  			t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
   385  		}
   386  		if TurkishCase.ToUpper(l) != u {
   387  			t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
   388  		}
   389  		if TurkishCase.ToLower(u) != l {
   390  			t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
   391  		}
   392  		if TurkishCase.ToTitle(u) != u {
   393  			t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
   394  		}
   395  		if TurkishCase.ToTitle(l) != u {
   396  			t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
   397  		}
   398  	}
   399  }
   400  
   401  var simpleFoldTests = []string{
   402  	
   403  	
   404  
   405  	
   406  	"Aa",
   407  	"δΔ",
   408  
   409  	
   410  	"KkK",
   411  	"Ssſ",
   412  
   413  	
   414  	"ρϱΡ",
   415  	"ͅΙιι",
   416  
   417  	
   418  	"İ",
   419  	"ı",
   420  
   421  	
   422  	"\u13b0\uab80",
   423  }
   424  
   425  func TestSimpleFold(t *testing.T) {
   426  	for _, tt := range simpleFoldTests {
   427  		cycle := []rune(tt)
   428  		r := cycle[len(cycle)-1]
   429  		for _, out := range cycle {
   430  			if r := SimpleFold(r); r != out {
   431  				t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
   432  			}
   433  			r = out
   434  		}
   435  	}
   436  
   437  	if r := SimpleFold(-42); r != -42 {
   438  		t.Errorf("SimpleFold(-42) = %v, want -42", r)
   439  	}
   440  }
   441  
   442  
   443  
   444  
   445  
   446  
   447  
   448  
   449  
   450  
   451  
   452  var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
   453  
   454  func TestCalibrate(t *testing.T) {
   455  	if !*calibrate {
   456  		return
   457  	}
   458  
   459  	if runtime.GOARCH == "amd64" {
   460  		fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
   461  	}
   462  
   463  	
   464  	
   465  	
   466  	
   467  	n := sort.Search(64, func(n int) bool {
   468  		tab := fakeTable(n)
   469  		blinear := func(b *testing.B) {
   470  			tab := tab
   471  			max := n*5 + 20
   472  			for i := 0; i < b.N; i++ {
   473  				for j := 0; j <= max; j++ {
   474  					linear(tab, uint16(j))
   475  				}
   476  			}
   477  		}
   478  		bbinary := func(b *testing.B) {
   479  			tab := tab
   480  			max := n*5 + 20
   481  			for i := 0; i < b.N; i++ {
   482  				for j := 0; j <= max; j++ {
   483  					binary(tab, uint16(j))
   484  				}
   485  			}
   486  		}
   487  		bmlinear := testing.Benchmark(blinear)
   488  		bmbinary := testing.Benchmark(bbinary)
   489  		fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
   490  		return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
   491  	})
   492  	fmt.Printf("calibration: linear cutoff = %d\n", n)
   493  }
   494  
   495  func fakeTable(n int) []Range16 {
   496  	var r16 []Range16
   497  	for i := 0; i < n; i++ {
   498  		r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
   499  	}
   500  	return r16
   501  }
   502  
   503  func linear(ranges []Range16, r uint16) bool {
   504  	for i := range ranges {
   505  		range_ := &ranges[i]
   506  		if r < range_.Lo {
   507  			return false
   508  		}
   509  		if r <= range_.Hi {
   510  			return (r-range_.Lo)%range_.Stride == 0
   511  		}
   512  	}
   513  	return false
   514  }
   515  
   516  func binary(ranges []Range16, r uint16) bool {
   517  	
   518  	lo := 0
   519  	hi := len(ranges)
   520  	for lo < hi {
   521  		m := int(uint(lo+hi) >> 1)
   522  		range_ := &ranges[m]
   523  		if range_.Lo <= r && r <= range_.Hi {
   524  			return (r-range_.Lo)%range_.Stride == 0
   525  		}
   526  		if r < range_.Lo {
   527  			hi = m
   528  		} else {
   529  			lo = m + 1
   530  		}
   531  	}
   532  	return false
   533  }
   534  
   535  func TestLatinOffset(t *testing.T) {
   536  	var maps = []map[string]*RangeTable{
   537  		Categories,
   538  		FoldCategory,
   539  		FoldScript,
   540  		Properties,
   541  		Scripts,
   542  	}
   543  	for _, m := range maps {
   544  		for name, tab := range m {
   545  			i := 0
   546  			for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
   547  				i++
   548  			}
   549  			if tab.LatinOffset != i {
   550  				t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)
   551  			}
   552  		}
   553  	}
   554  }
   555  
   556  func TestSpecialCaseNoMapping(t *testing.T) {
   557  	
   558  	
   559  	var noChangeForCapitalA = CaseRange{'A', 'A', [MaxCase]rune{0, 0, 0}}
   560  	got := strings.ToLowerSpecial(SpecialCase([]CaseRange{noChangeForCapitalA}), "ABC")
   561  	want := "Abc"
   562  	if got != want {
   563  		t.Errorf("got %q; want %q", got, want)
   564  	}
   565  }
   566  
   567  func TestNegativeRune(t *testing.T) {
   568  	
   569  	
   570  	
   571  	
   572  	
   573  	
   574  	nonLatin1 := []uint32{
   575  		
   576  		0x0100,
   577  		
   578  		0x0101,
   579  		
   580  		0x01C5,
   581  		
   582  		0x0300,
   583  		
   584  		0x0660,
   585  		
   586  		0x037E,
   587  		
   588  		0x02C2,
   589  		
   590  		0x1680,
   591  	}
   592  	for i := 0; i < MaxLatin1+len(nonLatin1); i++ {
   593  		base := uint32(i)
   594  		if i >= MaxLatin1 {
   595  			base = nonLatin1[i-MaxLatin1]
   596  		}
   597  
   598  		
   599  		
   600  		r := rune(base - 1<<31)
   601  		if Is(Letter, r) {
   602  			t.Errorf("Is(Letter, 0x%x - 1<<31) = true, want false", base)
   603  		}
   604  		if IsControl(r) {
   605  			t.Errorf("IsControl(0x%x - 1<<31) = true, want false", base)
   606  		}
   607  		if IsDigit(r) {
   608  			t.Errorf("IsDigit(0x%x - 1<<31) = true, want false", base)
   609  		}
   610  		if IsGraphic(r) {
   611  			t.Errorf("IsGraphic(0x%x - 1<<31) = true, want false", base)
   612  		}
   613  		if IsLetter(r) {
   614  			t.Errorf("IsLetter(0x%x - 1<<31) = true, want false", base)
   615  		}
   616  		if IsLower(r) {
   617  			t.Errorf("IsLower(0x%x - 1<<31) = true, want false", base)
   618  		}
   619  		if IsMark(r) {
   620  			t.Errorf("IsMark(0x%x - 1<<31) = true, want false", base)
   621  		}
   622  		if IsNumber(r) {
   623  			t.Errorf("IsNumber(0x%x - 1<<31) = true, want false", base)
   624  		}
   625  		if IsPrint(r) {
   626  			t.Errorf("IsPrint(0x%x - 1<<31) = true, want false", base)
   627  		}
   628  		if IsPunct(r) {
   629  			t.Errorf("IsPunct(0x%x - 1<<31) = true, want false", base)
   630  		}
   631  		if IsSpace(r) {
   632  			t.Errorf("IsSpace(0x%x - 1<<31) = true, want false", base)
   633  		}
   634  		if IsSymbol(r) {
   635  			t.Errorf("IsSymbol(0x%x - 1<<31) = true, want false", base)
   636  		}
   637  		if IsTitle(r) {
   638  			t.Errorf("IsTitle(0x%x - 1<<31) = true, want false", base)
   639  		}
   640  		if IsUpper(r) {
   641  			t.Errorf("IsUpper(0x%x - 1<<31) = true, want false", base)
   642  		}
   643  	}
   644  }
   645  
   646  func BenchmarkToUpper(b *testing.B) {
   647  	for i := 0; i < b.N; i++ {
   648  		_ = ToUpper('δ')
   649  	}
   650  }
   651  
   652  func BenchmarkToLower(b *testing.B) {
   653  	for i := 0; i < b.N; i++ {
   654  		_ = ToLower('Δ')
   655  	}
   656  }
   657  
   658  func BenchmarkSimpleFold(b *testing.B) {
   659  	bench := func(name string, r rune) {
   660  		b.Run(name, func(b *testing.B) {
   661  			for i := 0; i < b.N; i++ {
   662  				_ = SimpleFold(r)
   663  			}
   664  		})
   665  	}
   666  	bench("Upper", 'Δ')
   667  	bench("Lower", 'δ')
   668  	bench("Fold", '\u212A')
   669  	bench("NoFold", '習')
   670  }
   671  
View as plain text