@ -30,15 +30,15 @@ func resetDefaultCharsetsOrder() {
}
}
func TestRemoveBOMIfPresent ( t * testing . T ) {
res := RemoveBOMIfPresent ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
func TestMaybe RemoveBOM ( t * testing . T ) {
res := Maybe RemoveBOM( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
res = RemoveBOMIfPresent ( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res = Maybe RemoveBOM( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
}
func TestToUTF8WithErr ( t * testing . T ) {
func TestToUTF8 ( t * testing . T ) {
resetDefaultCharsetsOrder ( )
var res string
var err error
@ -47,53 +47,53 @@ func TestToUTF8WithErr(t *testing.T) {
// locale, so some conversions might behave differently. For that reason, we don't
// depend on particular conversions but in expected behaviors.
res , err = ToUTF8WithErr ( [ ] byte { 0x41 , 0x42 , 0x43 } )
res , err = ToUTF8 ( [ ] byte { 0x41 , 0x42 , 0x43 } , ConvertOpts { } )
assert . NoError ( t , err )
assert . Equal ( t , "ABC" , res )
// "áéíóú"
res , err = ToUTF8WithErr ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res , err = ToUTF8 ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . NoError ( t , err )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , [ ] byte ( res ) )
// "áéíóú"
res , err = ToUTF8WithErr ( [ ] byte {
res , err = ToUTF8 ( [ ] byte {
0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 ,
0xc3 , 0xba ,
} )
} , ConvertOpts { } )
assert . NoError ( t , err )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , [ ] byte ( res ) )
res , err = ToUTF8WithErr ( [ ] byte {
res , err = ToUTF8 ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0xF1 , 0x6F , 0x73 , 0x41 , 0x41 , 0x41 , 0x2e ,
} )
} , ConvertOpts { } )
assert . NoError ( t , err )
stringMustStartWith ( t , "Hola," , res )
stringMustEndWith ( t , "AAA." , res )
res , err = ToUTF8WithErr ( [ ] byte {
res , err = ToUTF8 ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 , 0x41 , 0x41 , 0x41 , 0x2e ,
} )
} , ConvertOpts { } )
assert . NoError ( t , err )
stringMustStartWith ( t , "Hola," , res )
stringMustEndWith ( t , "AAA." , res )
res , err = ToUTF8WithErr ( [ ] byte {
res , err = ToUTF8 ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0x81 , 0xA4 , 0x6F , 0x73 , 0x41 , 0x41 , 0x41 , 0x2e ,
} )
} , ConvertOpts { } )
assert . NoError ( t , err )
stringMustStartWith ( t , "Hola," , res )
stringMustEndWith ( t , "AAA." , res )
// Japanese (Shift-JIS)
// 日属秘ぞしちゅ。
res , err = ToUTF8WithErr ( [ ] byte {
res , err = ToUTF8 ( [ ] byte {
0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 ,
0xBF , 0x82 , 0xE3 , 0x81 , 0x42 ,
} )
} , ConvertOpts { } )
assert . NoError ( t , err )
assert . Equal ( t , [ ] byte {
0xE6 , 0x97 , 0xA5 , 0xE5 , 0xB1 , 0x9E , 0xE7 , 0xA7 , 0x98 , 0xE3 ,
@ -101,7 +101,7 @@ func TestToUTF8WithErr(t *testing.T) {
} ,
[ ] byte ( res ) )
res , err = ToUTF8WithErr ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } )
res , err = ToUTF8 ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , ConvertOpts { } )
assert . NoError ( t , err )
assert . Equal ( t , [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , [ ] byte ( res ) )
}
@ -109,22 +109,22 @@ func TestToUTF8WithErr(t *testing.T) {
func TestToUTF8WithFallback ( t * testing . T ) {
resetDefaultCharsetsOrder ( )
// "ABC"
res := ToUTF8WithFallback ( [ ] byte { 0x41 , 0x42 , 0x43 } )
res := ToUTF8WithFallback ( [ ] byte { 0x41 , 0x42 , 0x43 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0x41 , 0x42 , 0x43 } , res )
// "áéíóú"
res = ToUTF8WithFallback ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res = ToUTF8WithFallback ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
// UTF8 BOM + "áéíóú"
res = ToUTF8WithFallback ( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res = ToUTF8WithFallback ( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
// "Hola, así cómo ños"
res = ToUTF8WithFallback ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0xF1 , 0x6F , 0x73 ,
} )
} , ConvertOpts { } )
assert . Equal ( t , [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xC3 , 0xAD , 0x20 , 0x63 ,
0xC3 , 0xB3 , 0x6D , 0x6F , 0x20 , 0xC3 , 0xB1 , 0x6F , 0x73 ,
@ -133,126 +133,65 @@ func TestToUTF8WithFallback(t *testing.T) {
// "Hola, así cómo "
minmatch := [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xC3 , 0xAD , 0x20 , 0x63 , 0xC3 , 0xB3 , 0x6D , 0x6F , 0x20 }
res = ToUTF8WithFallback ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 } )
res = ToUTF8WithFallback ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 } , ConvertOpts { } )
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
assert . Equal ( t , minmatch , res [ 0 : len ( minmatch ) ] )
res = ToUTF8WithFallback ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x81 , 0xA4 , 0x6F , 0x73 } )
res = ToUTF8WithFallback ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x81 , 0xA4 , 0x6F , 0x73 } , ConvertOpts { } )
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
assert . Equal ( t , minmatch , res [ 0 : len ( minmatch ) ] )
// Japanese (Shift-JIS)
// "日属秘ぞしちゅ。"
res = ToUTF8WithFallback ( [ ] byte { 0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 , 0xBF , 0x82 , 0xE3 , 0x81 , 0x42 } )
res = ToUTF8WithFallback ( [ ] byte { 0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 , 0xBF , 0x82 , 0xE3 , 0x81 , 0x42 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte {
0xE6 , 0x97 , 0xA5 , 0xE5 , 0xB1 , 0x9E , 0xE7 , 0xA7 , 0x98 , 0xE3 ,
0x81 , 0x9E , 0xE3 , 0x81 , 0x97 , 0xE3 , 0x81 , 0xA1 , 0xE3 , 0x82 , 0x85 , 0xE3 , 0x80 , 0x82 ,
} , res )
res = ToUTF8WithFallback ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } )
res = ToUTF8WithFallback ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , res )
}
func TestToUTF8 ( t * testing . T ) {
resetDefaultCharsetsOrder ( )
// Note: golang compiler seems so behave differently depending on the current
// locale, so some conversions might behave differently. For that reason, we don't
// depend on particular conversions but in expected behaviors.
res := ToUTF8 ( string ( [ ] byte { 0x41 , 0x42 , 0x43 } ) )
assert . Equal ( t , "ABC" , res )
// "áéíóú"
res = ToUTF8 ( string ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } ) )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , [ ] byte ( res ) )
// BOM + "áéíóú"
res = ToUTF8 ( string ( [ ] byte {
0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 ,
0xc3 , 0xba ,
} ) )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , [ ] byte ( res ) )
// Latin1
// Hola, así cómo ños
res = ToUTF8 ( string ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0xF1 , 0x6F , 0x73 ,
} ) )
assert . Equal ( t , [ ] byte {
0x48 , 0x6f , 0x6c , 0x61 , 0x2c , 0x20 , 0x61 , 0x73 , 0xc3 , 0xad , 0x20 , 0x63 ,
0xc3 , 0xb3 , 0x6d , 0x6f , 0x20 , 0xc3 , 0xb1 , 0x6f , 0x73 ,
} , [ ] byte ( res ) )
// Latin1
// Hola, así cómo \x07ños
res = ToUTF8 ( string ( [ ] byte {
0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 ,
0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 ,
} ) )
// Hola,
bytesMustStartWith ( t , [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C } , [ ] byte ( res ) )
// This test FAILS
// res = ToUTF8("Hola, así cómo \x81ños")
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
// assert.Regexp(t, "^Hola, así cómo", res)
// Japanese (Shift-JIS)
// 日属秘ぞしちゅ。
res = ToUTF8 ( string ( [ ] byte {
0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 ,
0xBF , 0x82 , 0xE3 , 0x81 , 0x42 ,
} ) )
assert . Equal ( t , [ ] byte {
0xE6 , 0x97 , 0xA5 , 0xE5 , 0xB1 , 0x9E , 0xE7 , 0xA7 , 0x98 , 0xE3 ,
0x81 , 0x9E , 0xE3 , 0x81 , 0x97 , 0xE3 , 0x81 , 0xA1 , 0xE3 , 0x82 , 0x85 , 0xE3 , 0x80 , 0x82 ,
} ,
[ ] byte ( res ) )
res = ToUTF8 ( "\x00\x00\x00\x00" )
assert . Equal ( t , [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , [ ] byte ( res ) )
}
func TestToUTF8DropErrors ( t * testing . T ) {
resetDefaultCharsetsOrder ( )
// "ABC"
res := ToUTF8DropErrors ( [ ] byte { 0x41 , 0x42 , 0x43 } )
res := ToUTF8DropErrors ( [ ] byte { 0x41 , 0x42 , 0x43 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0x41 , 0x42 , 0x43 } , res )
// "áéíóú"
res = ToUTF8DropErrors ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res = ToUTF8DropErrors ( [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
// UTF8 BOM + "áéíóú"
res = ToUTF8DropErrors ( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } )
res = ToUTF8DropErrors ( [ ] byte { 0xef , 0xbb , 0xbf , 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0xc3 , 0xa1 , 0xc3 , 0xa9 , 0xc3 , 0xad , 0xc3 , 0xb3 , 0xc3 , 0xba } , res )
// "Hola, así cómo ños"
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0xF1 , 0x6F , 0x73 } )
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0xF1 , 0x6F , 0x73 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 } , res [ : 8 ] )
assert . Equal ( t , [ ] byte { 0x73 } , res [ len ( res ) - 1 : ] )
// "Hola, así cómo "
minmatch := [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xC3 , 0xAD , 0x20 , 0x63 , 0xC3 , 0xB3 , 0x6D , 0x6F , 0x20 }
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 } )
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x07 , 0xA4 , 0x6F , 0x73 } , ConvertOpts { } )
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
assert . Equal ( t , minmatch , res [ 0 : len ( minmatch ) ] )
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x81 , 0xA4 , 0x6F , 0x73 } )
res = ToUTF8DropErrors ( [ ] byte { 0x48 , 0x6F , 0x6C , 0x61 , 0x2C , 0x20 , 0x61 , 0x73 , 0xED , 0x20 , 0x63 , 0xF3 , 0x6D , 0x6F , 0x20 , 0x81 , 0xA4 , 0x6F , 0x73 } , ConvertOpts { } )
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
assert . Equal ( t , minmatch , res [ 0 : len ( minmatch ) ] )
// Japanese (Shift-JIS)
// "日属秘ぞしちゅ。"
res = ToUTF8DropErrors ( [ ] byte { 0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 , 0xBF , 0x82 , 0xE3 , 0x81 , 0x42 } )
res = ToUTF8DropErrors ( [ ] byte { 0x93 , 0xFA , 0x91 , 0xAE , 0x94 , 0xE9 , 0x82 , 0xBC , 0x82 , 0xB5 , 0x82 , 0xBF , 0x82 , 0xE3 , 0x81 , 0x42 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte {
0xE6 , 0x97 , 0xA5 , 0xE5 , 0xB1 , 0x9E , 0xE7 , 0xA7 , 0x98 , 0xE3 ,
0x81 , 0x9E , 0xE3 , 0x81 , 0x97 , 0xE3 , 0x81 , 0xA1 , 0xE3 , 0x82 , 0x85 , 0xE3 , 0x80 , 0x82 ,
} , res )
res = ToUTF8DropErrors ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } )
res = ToUTF8DropErrors ( [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , ConvertOpts { } )
assert . Equal ( t , [ ] byte { 0x00 , 0x00 , 0x00 , 0x00 } , res )
}
@ -302,10 +241,6 @@ func stringMustEndWith(t *testing.T, expected, value string) {
assert . Equal ( t , expected , value [ len ( value ) - len ( expected ) : ] )
}
func bytesMustStartWith ( t * testing . T , expected , value [ ] byte ) {
assert . Equal ( t , expected , value [ : len ( expected ) ] )
}
func TestToUTF8WithFallbackReader ( t * testing . T ) {
resetDefaultCharsetsOrder ( )
@ -317,7 +252,7 @@ func TestToUTF8WithFallbackReader(t *testing.T) {
}
input = input [ : testLen ]
input += "// Выключаем"
rd := ToUTF8WithFallbackReader ( bytes . NewReader ( [ ] byte ( input ) ) )
rd := ToUTF8WithFallbackReader ( bytes . NewReader ( [ ] byte ( input ) ) , ConvertOpts { } )
r , _ := io . ReadAll ( rd )
assert . EqualValuesf ( t , input , string ( r ) , "testing string len=%d" , testLen )
}