diff --git a/README.md b/README.md index cb5d4194..bf3ff83c 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,7 @@ validate := validator.New(validator.WithRequiredStructEnabled()) | base64rawurl | Base64RawURL String | | bic | Business Identifier Code (ISO 9362) | | bcp47_language_tag | Language tag (BCP 47) | +| bcp47_strict_language_tag | Language tag (BCP 47), strictly following RFC 5646 | | btc_addr | Bitcoin Address | | btc_addr_bech32 | Bitcoin Bech32 Address (segwit) | | credit_card | Credit Card Number | diff --git a/baked_in.go b/baked_in.go index 8fd55e77..e0358c1f 100644 --- a/baked_in.go +++ b/baked_in.go @@ -15,6 +15,7 @@ import ( "net/url" "os" "reflect" + "regexp" "strconv" "strings" "sync" @@ -235,6 +236,7 @@ var ( "iso4217": isIso4217, "iso4217_numeric": isIso4217Numeric, "bcp47_language_tag": isBCP47LanguageTag, + "bcp47_strict_language_tag": isBCP47StrictLanguageTag, "postcode_iso3166_alpha2": isPostcodeByIso3166Alpha2, "postcode_iso3166_alpha2_field": isPostcodeByIso3166Alpha2Field, "bic": isIsoBicFormat, @@ -2943,6 +2945,188 @@ func isBCP47LanguageTag(fl FieldLevel) bool { panic(fmt.Sprintf("Bad field type %s", field.Type())) } +// isBCP47StrictLanguageTag is the validation function for validating if the current field's value is a valid BCP 47 language tag +// according to https://www.rfc-editor.org/rfc/bcp/bcp47.txt +func isBCP47StrictLanguageTag(fl FieldLevel) bool { + field := fl.Field() + + if field.Kind() == reflect.String { + var languageTagRe = regexp.MustCompile(strings.Join([]string{ + // group 1: + `^(`, + // irregular + `EN-GB-OED|I-AMI|I-BNN|I-DEFAULT|I-ENOCHIAN|I-HAK|I-KLINGON|I-LUX|I-MINGO|I-NAVAJO|I-PWN|I-TAO|I-TAY|I-TSU|`, + `SGN-BE-FR|SGN-BE-NL|SGN-CH-DE|`, + // regular + `ART-LOJBAN|CEL-GAULISH|NO-BOK|NO-NYN|ZH-GUOYU|ZH-HAKKA|ZH-MIN|ZH-MIN-NAN|ZH-XIANG|`, + // privateuse + `X-[A-Z0-9]{1,8}`, + `)$`, + + `|`, + + // langtag + `^`, + `((?:[A-Z]{2,3}(?:-[A-Z]{3}){0,3})|[A-Z]{4}|[A-Z]{5,8})`, // group 2: language + `(?:-([A-Z]{4}))?`, // group 3: script + `(?:-([A-Z]{2}|[0-9]{3}))?`, // group 4: region + `(?:-((?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3})(?:-(?:[A-Z0-9]{5,8}|[0-9][A-Z0-9]{3}))*))?`, // group 5: variant + `(?:-((?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+)(?:-(?:[A-WYZ0-9](?:-[A-Z0-9]{2,8})+))*))?`, // group 6: extension + `(?:-X(?:-[A-Z0-9]{1,8})+)?`, + `$`, + }, "")) + + languageTag := strings.ToUpper(field.String()) + + m := languageTagRe.FindStringSubmatch(languageTag) + if m == nil { + return false + } + + grandfatheredOrPrivateuse := m[1] + lang := m[2] + script := m[3] + region := m[4] + variant := m[5] + extension := m[6] + + if grandfatheredOrPrivateuse != "" { + return true + } + + // language = 2*3ALPHA ; shortest ISO 639 code + // ["-" extlang] ; sometimes followed by + // ; extended language subtags + // / 4ALPHA ; or reserved for future use + // / 5*8ALPHA ; or registered language subtag + switch n := len(lang); { + // 2*3ALPHA "-" extlang + case strings.Contains(lang, "-"): + parts := strings.Split(lang, "-") + + baseLang := parts[0] + base, err := language.ParseBase(baseLang) + if err != nil { + return false + } + // base.String() normalizes the base to the shortest code + // for the language + if strings.ToUpper(base.String()) != baseLang { + return false + } + + for _, e := range parts[1:] { + prefixes, ok := iana_subtag_registry_extlangs[strings.ToLower(e)] + if !ok { + return false + } + + if len(prefixes) > 0 { + found := false + for _, p := range prefixes { + if strings.HasPrefix(strings.ToLower(languageTag)+"-", strings.ToLower(p)) { + found = true + break + } + } + if !found { + return false + } + } + } + // 2*3ALPHA ; shortest ISO 639 code + case n <= 3: + base, err := language.ParseBase(lang) + if err != nil { + return false + } + + // base.String() normalizes the base to the shortest code + // for the language + if strings.ToUpper(base.String()) != lang { + return false + } + // 4ALPHA ; or reserved for future use + case n == 4: + return false + // 5*8ALPHA ; or registered language subtag + default: + // registered language subtag with 5+ characters. + // As of today there aren't any. + // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry + return false + } + + // script = 4ALPHA ; ISO 15924 code + if script != "" { + _, err := language.ParseScript(script) + if err != nil { + return false + } + } + + // region = 2ALPHA ; ISO 3166-1 code + // 3DIGIT ; UN M.49 code + if region != "" { + if len(region) == 2 { + _, err := language.ParseRegion(region) + if err != nil { + return false + } + } else { + // Can't use language.ParseRegion() here because not all + // UN M.49 region codes are allowed, just the subset present + // in the IANA subtag registry. + _, ok := iana_subtag_registry_m49_codes[region] + if !ok { + return false + } + } + } + + // variant = 5*8alphanum ; registered variants + // / (DIGIT 3alphanum) + if variant != "" { + for v := range strings.SplitSeq(variant, "-") { + lowerVariant := strings.ToLower(v) + _, err := language.ParseVariant(lowerVariant) + if err != nil { + return false + } + + prefixes, ok := iana_subtag_registry_variants[lowerVariant] + if !ok { + return false + } + + if len(prefixes) > 0 { + found := false + for _, p := range prefixes { + if strings.HasPrefix(strings.ToLower(languageTag)+"-", strings.ToLower(p)) { + found = true + break + } + } + if !found { + return false + } + } + } + } + + if extension != "" { + _, err := language.ParseExtension(extension) + if err != nil { + return false + } + } + + return true + } + + panic(fmt.Sprintf("Bad field type %s", field.Type())) +} + // isIsoBicFormat is the validation function for validating if the current field's value is a valid Business Identifier Code (SWIFT code), defined in ISO 9362 func isIsoBicFormat(fl FieldLevel) bool { bicString := fl.Field().String() diff --git a/country_codes.go b/country_codes.go index b5f10d3c..6f6327be 100644 --- a/country_codes.go +++ b/country_codes.go @@ -1175,3 +1175,15 @@ var iso3166_2 = map[string]struct{}{ "ZW-BU": {}, "ZW-HA": {}, "ZW-MA": {}, "ZW-MC": {}, "ZW-ME": {}, "ZW-MI": {}, "ZW-MN": {}, "ZW-MS": {}, "ZW-MV": {}, "ZW-MW": {}, } + +// Subset of UN M.49 region codes present in the IANA Language Subtag Registry: +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +var iana_subtag_registry_m49_codes = map[string]struct{}{ + "001": {}, "002": {}, "003": {}, "005": {}, "009": {}, + "011": {}, "013": {}, "014": {}, "015": {}, "017": {}, + "018": {}, "019": {}, "021": {}, "029": {}, "030": {}, + "034": {}, "035": {}, "039": {}, "053": {}, "054": {}, + "057": {}, "061": {}, "142": {}, "143": {}, "145": {}, + "150": {}, "151": {}, "154": {}, "155": {}, "202": {}, + "419": {}, +} diff --git a/doc.go b/doc.go index cd6eefdc..bab2767b 100644 --- a/doc.go +++ b/doc.go @@ -1378,6 +1378,14 @@ More information on https://pkg.go.dev/golang.org/x/text/language Usage: bcp47_language_tag +# BCP 47 Strict Language Tag + +This validates that a string value is a valid BCP 47 language tag strictly following RFC 5646 rules, +unlike language.Parse which also accepts Unicode extensions. +see https://www.rfc-editor.org/rfc/bcp/bcp47.txt + + Usage: bcp47_strict_language_tag + BIC (SWIFT code) This validates that a string value is a valid Business Identifier Code (SWIFT code), defined in ISO 9362. diff --git a/language_codes.go b/language_codes.go new file mode 100644 index 00000000..16cda6c3 --- /dev/null +++ b/language_codes.go @@ -0,0 +1,403 @@ +package validator + +// Type: variant and their associated primary language prefixes from the +// IANA Language Subtag Registry: +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +var iana_subtag_registry_variants = map[string][]string{ + "1606nict": {"frm"}, + "1694acad": {"fr"}, + "1901": {"de"}, + "1959acad": {"be"}, + "1994": {"sl-rozaj", "sl-rozaj-biske", "sl-rozaj-njiva", "sl-rozaj-osojs", "sl-rozaj-solba"}, + "1996": {"de"}, + "abl1943": {"pt-BR"}, + "akhmimic": {"cop"}, + "akuapem": {"tw"}, + "alalc97": {}, + "aluku": {"djk"}, + "anpezo": {"lld"}, + "ao1990": {"pt", "gl"}, + "aranes": {"oc"}, + "arevela": {"hy"}, + "arevmda": {"hy"}, + "arkaika": {"eo"}, + "asante": {"tw"}, + "auvern": {"oc"}, + "baku1926": {"az", "ba", "crh", "kk", "krc", "ky", "sah", "tk", "tt", "uz"}, + "balanka": {"blo"}, + "barla": {"kea"}, + "basiceng": {"en"}, + "bauddha": {"sa"}, + "bciav": {"zbl"}, + "bcizbl": {"zbl"}, + "biscayan": {"eu"}, + "biske": {"sl-rozaj"}, + "blasl": {"ase", "sgn-ase"}, + "bohairic": {"cop"}, + "bohoric": {"sl"}, + "boont": {"en"}, + "bornholm": {"da"}, + "cisaup": {"oc"}, + "colb1945": {"pt"}, + "cornu": {"en"}, + "creiss": {"oc"}, + "dajnko": {"sl"}, + "ekavsk": {"sr", "sr-Latn", "sr-Cyrl"}, + "emodeng": {"en"}, + "fascia": {"lld"}, + "fayyumic": {"cop"}, + "fodom": {"lld"}, + "fonipa": {}, + "fonkirsh": {}, + "fonnapa": {}, + "fonupa": {}, + "fonxsamp": {}, + "gallo": {"fr"}, + "gascon": {"oc"}, + "gherd": {"lld"}, + "grclass": {"oc", "oc-aranes", "oc-auvern", "oc-cisaup", "oc-creiss", "oc-gascon", "oc-lemosin", "oc-lengadoc", "oc-nicard", "oc-provenc", "oc-vivaraup"}, + "grital": {"oc", "oc-cisaup", "oc-nicard", "oc-provenc"}, + "grmistr": {"oc", "oc-aranes", "oc-auvern", "oc-cisaup", "oc-creiss", "oc-gascon", "oc-lemosin", "oc-lengadoc", "oc-nicard", "oc-provenc", "oc-vivaraup"}, + "hanoi": {"vi"}, + "hepburn": {"ja-Latn"}, + "heploc": {"ja-Latn-hepburn"}, + "hognorsk": {"nn"}, + "hsistemo": {"eo"}, + "huett": {"vi"}, + "ijekavsk": {"sr", "sr-Latn", "sr-Cyrl"}, + "itihasa": {"sa"}, + "ivanchov": {"bg"}, + "jauer": {"rm"}, + "jyutping": {"yue"}, + "kkcor": {"kw"}, + "kleinsch": {"kl", "kl-tunumiit"}, + "kociewie": {"pl"}, + "kscor": {"kw"}, + "laukika": {"sa"}, + "leidentr": {"egy"}, + "lemosin": {"oc"}, + "lengadoc": {"oc"}, + "lipaw": {"sl-rozaj"}, + "ltg1929": {"ltg"}, + "ltg2007": {"ltg"}, + "luna1918": {"ru"}, + "lycopol": {"cop"}, + "mdcegyp": {"egy"}, + "mdctrans": {"egy"}, + "mesokem": {"cop"}, + "metelko": {"sl"}, + "monoton": {"el"}, + "ndyuka": {"djk"}, + "nedis": {"sl"}, + "newfound": {"en-CA"}, + "nicard": {"oc"}, + "njiva": {"sl-rozaj"}, + "nulik": {"vo"}, + "osojs": {"sl-rozaj"}, + "oxendict": {"en"}, + "pahawh2": {"mww", "hnj"}, + "pahawh3": {"mww", "hnj"}, + "pahawh4": {"mww", "hnj"}, + "pamaka": {"djk"}, + "peano": {"la"}, + "pehoeji": {"nan-Latn"}, + "petr1708": {"ru"}, + "pinyin": {"zh-Latn", "bo-Latn"}, + "polyton": {"el"}, + "provenc": {"oc"}, + "puter": {"rm"}, + "rigik": {"vo"}, + "rozaj": {"sl"}, + "rumgr": {"rm"}, + "sahidic": {"cop"}, + "saigon": {"vi"}, + "scotland": {"en"}, + "scouse": {"en"}, + "simple": {}, + "solba": {"sl-rozaj"}, + "sotav": {"kea"}, + "spanglis": {"en", "es"}, + "surmiran": {"rm"}, + "sursilv": {"rm"}, + "sutsilv": {"rm"}, + "synnejyl": {"da"}, + "tailo": {"nan-Latn"}, + "tarask": {"be"}, + "tongyong": {"zh-Latn"}, + "tunumiit": {"kl"}, + "uccor": {"kw"}, + "ucrcor": {"kw"}, + "ulster": {"sco"}, + "unifon": {"en", "hup", "kyh", "tol", "yur"}, + "vaidika": {"sa"}, + "valbadia": {"lld"}, + "valencia": {"ca"}, + "vallader": {"rm"}, + "vecdruka": {"lv"}, + "viennese": {"de"}, + "vivaraup": {"oc"}, + "wadegile": {"zh-Latn"}, + "xsistemo": {"eo"}, +} + +// Type: extlang and their associated primary language prefixes from the +// IANA Language Subtag Registry: +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +var iana_subtag_registry_extlangs = map[string][]string{ + "aao": {"ar"}, + "abh": {"ar"}, + "abv": {"ar"}, + "acm": {"ar"}, + "acq": {"ar"}, + "acw": {"ar"}, + "acx": {"ar"}, + "acy": {"ar"}, + "adf": {"ar"}, + "ads": {"sgn"}, + "aeb": {"ar"}, + "aec": {"ar"}, + "aed": {"sgn"}, + "aen": {"sgn"}, + "afb": {"ar"}, + "afg": {"sgn"}, + "ajp": {"ar"}, + "ajs": {"sgn"}, + "apc": {"ar"}, + "apd": {"ar"}, + "arb": {"ar"}, + "arq": {"ar"}, + "ars": {"ar"}, + "ary": {"ar"}, + "arz": {"ar"}, + "ase": {"sgn"}, + "asf": {"sgn"}, + "asp": {"sgn"}, + "asq": {"sgn"}, + "asw": {"sgn"}, + "auz": {"ar"}, + "avl": {"ar"}, + "ayh": {"ar"}, + "ayl": {"ar"}, + "ayn": {"ar"}, + "ayp": {"ar"}, + "bbz": {"ar"}, + "bfi": {"sgn"}, + "bfk": {"sgn"}, + "bjn": {"ms"}, + "bog": {"sgn"}, + "bqn": {"sgn"}, + "bqy": {"sgn"}, + "btj": {"ms"}, + "bve": {"ms"}, + "bvl": {"sgn"}, + "bvu": {"ms"}, + "bzs": {"sgn"}, + "cdo": {"zh"}, + "cds": {"sgn"}, + "cjy": {"zh"}, + "cmn": {"zh"}, + "cnp": {"zh"}, + "coa": {"ms"}, + "cpx": {"zh"}, + "csc": {"sgn"}, + "csd": {"sgn"}, + "cse": {"sgn"}, + "csf": {"sgn"}, + "csg": {"sgn"}, + "csl": {"sgn"}, + "csn": {"sgn"}, + "csp": {"zh"}, + "csq": {"sgn"}, + "csr": {"sgn"}, + "csx": {"sgn"}, + "czh": {"zh"}, + "czo": {"zh"}, + "doq": {"sgn"}, + "dse": {"sgn"}, + "dsl": {"sgn"}, + "dsz": {"sgn"}, + "dup": {"ms"}, + "ecs": {"sgn"}, + "ehs": {"sgn"}, + "esl": {"sgn"}, + "esn": {"sgn"}, + "eso": {"sgn"}, + "eth": {"sgn"}, + "fcs": {"sgn"}, + "fse": {"sgn"}, + "fsl": {"sgn"}, + "fss": {"sgn"}, + "gan": {"zh"}, + "gds": {"sgn"}, + "gom": {"kok"}, + "gse": {"sgn"}, + "gsg": {"sgn"}, + "gsm": {"sgn"}, + "gss": {"sgn"}, + "gus": {"sgn"}, + "hab": {"sgn"}, + "haf": {"sgn"}, + "hak": {"zh"}, + "hds": {"sgn"}, + "hji": {"ms"}, + "hks": {"sgn"}, + "hnm": {"zh"}, + "hos": {"sgn"}, + "hps": {"sgn"}, + "hsh": {"sgn"}, + "hsl": {"sgn"}, + "hsn": {"zh"}, + "icl": {"sgn"}, + "iks": {"sgn"}, + "ils": {"sgn"}, + "inl": {"sgn"}, + "ins": {"sgn"}, + "ise": {"sgn"}, + "isg": {"sgn"}, + "isr": {"sgn"}, + "jak": {"ms"}, + "jax": {"ms"}, + "jcs": {"sgn"}, + "jhs": {"sgn"}, + "jks": {"sgn"}, + "jls": {"sgn"}, + "jos": {"sgn"}, + "jsl": {"sgn"}, + "jus": {"sgn"}, + "kgi": {"sgn"}, + "knn": {"kok"}, + "kvb": {"ms"}, + "kvk": {"sgn"}, + "kvr": {"ms"}, + "kxd": {"ms"}, + "lbs": {"sgn"}, + "lce": {"ms"}, + "lcf": {"ms"}, + "lgs": {"sgn"}, + "liw": {"ms"}, + "lls": {"sgn"}, + "lsb": {"sgn"}, + "lsc": {"sgn"}, + "lsg": {"sgn"}, + "lsl": {"sgn"}, + "lsn": {"sgn"}, + "lso": {"sgn"}, + "lsp": {"sgn"}, + "lst": {"sgn"}, + "lsv": {"sgn"}, + "lsw": {"sgn"}, + "lsy": {"sgn"}, + "ltg": {"lv"}, + "luh": {"zh"}, + "lvs": {"lv"}, + "lws": {"sgn"}, + "lzh": {"zh"}, + "max": {"ms"}, + "mdl": {"sgn"}, + "meo": {"ms"}, + "mfa": {"ms"}, + "mfb": {"ms"}, + "mfs": {"sgn"}, + "min": {"ms"}, + "mnp": {"zh"}, + "mqg": {"ms"}, + "mre": {"sgn"}, + "msd": {"sgn"}, + "msi": {"ms"}, + "msr": {"sgn"}, + "mui": {"ms"}, + "mzc": {"sgn"}, + "mzg": {"sgn"}, + "mzy": {"sgn"}, + "nan": {"zh"}, + "nbs": {"sgn"}, + "ncs": {"sgn"}, + "nsi": {"sgn"}, + "nsl": {"sgn"}, + "nsp": {"sgn"}, + "nsr": {"sgn"}, + "nzs": {"sgn"}, + "okl": {"sgn"}, + "orn": {"ms"}, + "ors": {"ms"}, + "pel": {"ms"}, + "pga": {"ar"}, + "pgz": {"sgn"}, + "pks": {"sgn"}, + "prl": {"sgn"}, + "prz": {"sgn"}, + "psc": {"sgn"}, + "psd": {"sgn"}, + "pse": {"ms"}, + "psg": {"sgn"}, + "psl": {"sgn"}, + "pso": {"sgn"}, + "psp": {"sgn"}, + "psr": {"sgn"}, + "pys": {"sgn"}, + "rib": {"sgn"}, + "rms": {"sgn"}, + "rnb": {"sgn"}, + "rsi": {"sgn"}, + "rsl": {"sgn"}, + "rsm": {"sgn"}, + "rsn": {"sgn"}, + "sdl": {"sgn"}, + "sfb": {"sgn"}, + "sfs": {"sgn"}, + "sgg": {"sgn"}, + "sgx": {"sgn"}, + "shu": {"ar"}, + "sjc": {"zh"}, + "slf": {"sgn"}, + "sls": {"sgn"}, + "sqk": {"sgn"}, + "sqs": {"sgn"}, + "sqx": {"sgn"}, + "ssh": {"ar"}, + "ssp": {"sgn"}, + "ssr": {"sgn"}, + "svk": {"sgn"}, + "swc": {"sw"}, + "swh": {"sw"}, + "swl": {"sgn"}, + "syy": {"sgn"}, + "szs": {"sgn"}, + "tmw": {"ms"}, + "tse": {"sgn"}, + "tsm": {"sgn"}, + "tsq": {"sgn"}, + "tss": {"sgn"}, + "tsy": {"sgn"}, + "tza": {"sgn"}, + "ugn": {"sgn"}, + "ugy": {"sgn"}, + "ukl": {"sgn"}, + "uks": {"sgn"}, + "urk": {"ms"}, + "uzn": {"uz"}, + "uzs": {"uz"}, + "vgt": {"sgn"}, + "vkk": {"ms"}, + "vkt": {"ms"}, + "vsi": {"sgn"}, + "vsl": {"sgn"}, + "vsv": {"sgn"}, + "wbs": {"sgn"}, + "wuu": {"zh"}, + "xki": {"sgn"}, + "xml": {"sgn"}, + "xmm": {"ms"}, + "xms": {"sgn"}, + "yds": {"sgn"}, + "ygs": {"sgn"}, + "yhs": {"sgn"}, + "ysl": {"sgn"}, + "ysm": {"sgn"}, + "yue": {"zh"}, + "zib": {"sgn"}, + "zlm": {"ms"}, + "zmi": {"ms"}, + "zsl": {"sgn"}, + "zsm": {"ms"}, +} diff --git a/translations/en/en.go b/translations/en/en.go index 0bf4f7f9..ee7e30fa 100644 --- a/translations/en/en.go +++ b/translations/en/en.go @@ -1489,6 +1489,11 @@ func RegisterDefaultTranslations(v *validator.Validate, trans ut.Translator) (er translation: "{0} must be a valid cve identifier", override: false, }, + { + tag: "bcp47_strict_language_tag", + translation: "{0} must be a valid BCP 47 language tag", + override: false, + }, { tag: "validateFn", translation: "{0} must be a valid object", diff --git a/translations/it/it.go b/translations/it/it.go index ce41be07..419f1104 100644 --- a/translations/it/it.go +++ b/translations/it/it.go @@ -1213,6 +1213,11 @@ func RegisterDefaultTranslations(v *validator.Validate, trans ut.Translator) (er translation: "{0} deve essere un'immagine valida", override: false, }, + { + tag: "bcp47_strict_language_tag", + translation: "{0} deve essere uno specificatore di lingua BCP47 valido", + override: false, + }, } for _, t := range translations { diff --git a/validator_test.go b/validator_test.go index 8e969d30..e5f29b86 100644 --- a/validator_test.go +++ b/validator_test.go @@ -13174,6 +13174,7 @@ func TestBCP47LanguageTagValidation(t *testing.T) { {"az-Cyrl-AZ", "bcp47_language_tag", true}, {"en-029", "bcp47_language_tag", true}, {"xog", "bcp47_language_tag", true}, + {"i-klingon", "bcp47_language_tag", true}, } validate := New() @@ -13202,6 +13203,143 @@ func TestBCP47LanguageTagValidation(t *testing.T) { }, "Bad field type int") } +func TestBCP47StrictLanguageTagValidation(t *testing.T) { + tests := []struct { + value string `validate:"bcp47_strict_language_tag"` + expected bool + }{ + // VALID + // + {"en-US", true}, + {"es", true}, + {"az-Cyrl-AZ", true}, + {"en-029", true}, + {"xog", true}, + {"i-klingon", true}, + {"zh-min-nan", true}, + {"x-foobar", true}, // + + // script + {"it-Aran", true}, + + // region + {"it-Aran-142", true}, + {"it-Aran-IT", true}, + + // variant + {"frm-Aran-FR-1606nict", true}, + + // extension + {"frm-Aran-FR-a-1234567c", true}, + {"frm-Aran-FR-b-ab", true}, + {"frm-Aran-FR-a-12345678-12345678", true}, + + // privateuse + {"frm-Aran-FR-x-a", true}, + {"frm-Aran-FR-x-1234567a", true}, + {"frm-Aran-FR-x-1234567a-abcde", true}, + + // From RFC Appendix A. + // + // Simple language subtag: + {"de", true}, // German + {"fr", true}, // French + {"ja", true}, // Japanese + {"i-enochian", true}, // example of a grandfathered tag + // Language subtag plus Script subtag: + {"zh-Hant", true}, // Chinese written using the Traditional Chinese script + {"zh-Hans", true}, // Chinese written using the Simplified Chinese script + {"sr-Cyrl", true}, // Serbian written using the Cyrillic script + {"sr-Latn", true}, // Serbian written using the Latin script + // Extended language subtags and their primary language subtag counterparts: + {"zh-cmn-Hans-CN", true}, // Chinese, Mandarin, Simplified script, as used in China + {"cmn-Hans-CN", true}, // Mandarin Chinese, Simplified script, as used in China + {"zh-yue-HK", true}, // Chinese, Cantonese, as used in Hong Kong SAR + {"yue-HK", true}, // Cantonese Chinese, as used in Hong Kong SAR + // Language-Script-Region: + {"zh-Hans-CN", true}, // Chinese written using the Simplified script as used in mainland China + {"sr-Latn-RS", true}, // Serbian written using the Latin script as used in Serbia + // Language-Variant: + {"sl-rozaj", true}, // Resian dialect of Slovenian + {"sl-rozaj-biske", true}, // San Giorgio dialect of Resian dialect of Slovenian + {"sl-nedis", true}, // Nadiza dialect of Slovenian + // Language-Region-Variant: + {"de-CH-1901", true}, // German as used in Switzerland using the 1901 variant (orthography) + {"sl-IT-nedis", true}, // Slovenian as used in Italy, Nadiza dialect + // Language-Script-Region-Variant: + {"hy-Latn-IT-arevela", true}, // Eastern Armenian written in Latin script, as used in Italy + // Language-Region: + {"de-DE", true}, // German for Germany + {"en-US", true}, // English as used in the United States + {"es-419", true}, // Spanish appropriate for the Latin America and Caribbean region using the UN region code + // Private use subtags: + {"de-CH-x-phonebk", true}, // private use subtag + {"az-Arab-x-AZE-derbend", true}, // private use subtag + // Private use registry values: + {"x-whatever", true}, // private use using the singleton 'x' + {"qaa-Qaaa-QM-x-southern", true}, // all private tags + {"de-Qaaa", true}, // German, with a private script + {"sr-Latn-QM", true}, // Serbian, Latin script, private region + {"sr-Qaaa-RS", true}, // Serbian, private script, for Serbia + + // INVALID + // + // language + {"English", false}, + {"AmericanEnglish", false}, // too long + {"ESES", false}, // 4 chars are reserved for future use + {"ita", false}, // valid but not shortest ISO 639 code + {"en_GB", false}, + {"eng", false}, + {"xfoobar", false}, + {"x-123456789", false}, + + // script + {"it-Aram", false}, // "Aram" is not a valid script + + // region + {"it-Aran-ITA", false}, // "ITA" is not a valid ISO 3166-1 alpha2 code + {"it-Aran-380", false}, // "380" is a valid UN M.49 region code, but it's not in the IANA language subtag registry + + // variant + {"it-Aran-FR-1606nict", false}, // "1606nict" must be used prefix "frm" + + // extension + {"frm-Aran-FR-a-12345678a", false}, // too long + {"frm-Aran-FR-b-a", false}, // too short + {"frm-Aran-FR-a-12345678-12345678a", false}, // second extension too long + + // privateuse + {"frm-Aran-FR-x-12345678a", false}, // too long + {"frm-Aran-FR-x-", false}, // too short + } + + validate := New() + + for i, test := range tests { + errs := validate.Var(test.value, "bcp47_strict_language_tag") + + if test.expected { + if !IsEqual(errs, nil) { + t.Fatalf("'%s' should be valid (index %d). Error: %s", test.value, i, errs) + } + } else { + if IsEqual(errs, nil) { + t.Fatalf("'%s' should not be valid (index %d)", test.value, i) + } else { + val := getError(errs, "", "") + if val.Tag() != "bcp47_strict_language_tag" { + t.Fatalf("'%s' (index %d) failed with validator other than 'bcp47_strict_language_tag'. Error: %s", test.value, i, errs) + } + } + } + } + + PanicMatches(t, func() { + _ = validate.Var(2, "bcp47_strict_language_tag") + }, "Bad field type int") +} + func TestBicIsoFormatValidation(t *testing.T) { tests := []struct { value string `validate:"bic"`