mirror of
https://github.com/strongdm/comply
synced 2024-11-25 00:54:54 +00:00
197 lines
5.2 KiB
Go
197 lines
5.2 KiB
Go
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Package strs provides string manipulation functionality specific to protobuf.
|
||
|
package strs
|
||
|
|
||
|
import (
|
||
|
"go/token"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
|
||
|
"google.golang.org/protobuf/internal/flags"
|
||
|
"google.golang.org/protobuf/reflect/protoreflect"
|
||
|
)
|
||
|
|
||
|
// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
|
||
|
func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
|
||
|
if flags.ProtoLegacy {
|
||
|
if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
|
||
|
return fd.EnforceUTF8()
|
||
|
}
|
||
|
}
|
||
|
return fd.Syntax() == protoreflect.Proto3
|
||
|
}
|
||
|
|
||
|
// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
|
||
|
//
|
||
|
// If there is an interior underscore followed by a lower case letter,
|
||
|
// drop the underscore and convert the letter to upper case.
|
||
|
func GoCamelCase(s string) string {
|
||
|
// Invariant: if the next letter is lower case, it must be converted
|
||
|
// to upper case.
|
||
|
// That is, we process a word at a time, where words are marked by _ or
|
||
|
// upper case letter. Digits are treated as words.
|
||
|
var b []byte
|
||
|
for i := 0; i < len(s); i++ {
|
||
|
c := s[i]
|
||
|
switch {
|
||
|
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
|
||
|
// Skip over '.' in ".{{lowercase}}".
|
||
|
case c == '.':
|
||
|
b = append(b, '_') // convert '.' to '_'
|
||
|
case c == '_' && (i == 0 || s[i-1] == '.'):
|
||
|
// Convert initial '_' to ensure we start with a capital letter.
|
||
|
// Do the same for '_' after '.' to match historic behavior.
|
||
|
b = append(b, 'X') // convert '_' to 'X'
|
||
|
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
|
||
|
// Skip over '_' in "_{{lowercase}}".
|
||
|
case isASCIIDigit(c):
|
||
|
b = append(b, c)
|
||
|
default:
|
||
|
// Assume we have a letter now - if not, it's a bogus identifier.
|
||
|
// The next word is a sequence of characters that must start upper case.
|
||
|
if isASCIILower(c) {
|
||
|
c -= 'a' - 'A' // convert lowercase to uppercase
|
||
|
}
|
||
|
b = append(b, c)
|
||
|
|
||
|
// Accept lower case sequence that follows.
|
||
|
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
|
||
|
b = append(b, s[i+1])
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return string(b)
|
||
|
}
|
||
|
|
||
|
// GoSanitized converts a string to a valid Go identifier.
|
||
|
func GoSanitized(s string) string {
|
||
|
// Sanitize the input to the set of valid characters,
|
||
|
// which must be '_' or be in the Unicode L or N categories.
|
||
|
s = strings.Map(func(r rune) rune {
|
||
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||
|
return r
|
||
|
}
|
||
|
return '_'
|
||
|
}, s)
|
||
|
|
||
|
// Prepend '_' in the event of a Go keyword conflict or if
|
||
|
// the identifier is invalid (does not start in the Unicode L category).
|
||
|
r, _ := utf8.DecodeRuneInString(s)
|
||
|
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
|
||
|
return "_" + s
|
||
|
}
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
|
||
|
// according to the protobuf JSON specification.
|
||
|
func JSONCamelCase(s string) string {
|
||
|
var b []byte
|
||
|
var wasUnderscore bool
|
||
|
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
||
|
c := s[i]
|
||
|
if c != '_' {
|
||
|
if wasUnderscore && isASCIILower(c) {
|
||
|
c -= 'a' - 'A' // convert to uppercase
|
||
|
}
|
||
|
b = append(b, c)
|
||
|
}
|
||
|
wasUnderscore = c == '_'
|
||
|
}
|
||
|
return string(b)
|
||
|
}
|
||
|
|
||
|
// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
|
||
|
// according to the protobuf JSON specification.
|
||
|
func JSONSnakeCase(s string) string {
|
||
|
var b []byte
|
||
|
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
||
|
c := s[i]
|
||
|
if isASCIIUpper(c) {
|
||
|
b = append(b, '_')
|
||
|
c += 'a' - 'A' // convert to lowercase
|
||
|
}
|
||
|
b = append(b, c)
|
||
|
}
|
||
|
return string(b)
|
||
|
}
|
||
|
|
||
|
// MapEntryName derives the name of the map entry message given the field name.
|
||
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
|
||
|
func MapEntryName(s string) string {
|
||
|
var b []byte
|
||
|
upperNext := true
|
||
|
for _, c := range s {
|
||
|
switch {
|
||
|
case c == '_':
|
||
|
upperNext = true
|
||
|
case upperNext:
|
||
|
b = append(b, byte(unicode.ToUpper(c)))
|
||
|
upperNext = false
|
||
|
default:
|
||
|
b = append(b, byte(c))
|
||
|
}
|
||
|
}
|
||
|
b = append(b, "Entry"...)
|
||
|
return string(b)
|
||
|
}
|
||
|
|
||
|
// EnumValueName derives the camel-cased enum value name.
|
||
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
|
||
|
func EnumValueName(s string) string {
|
||
|
var b []byte
|
||
|
upperNext := true
|
||
|
for _, c := range s {
|
||
|
switch {
|
||
|
case c == '_':
|
||
|
upperNext = true
|
||
|
case upperNext:
|
||
|
b = append(b, byte(unicode.ToUpper(c)))
|
||
|
upperNext = false
|
||
|
default:
|
||
|
b = append(b, byte(unicode.ToLower(c)))
|
||
|
upperNext = false
|
||
|
}
|
||
|
}
|
||
|
return string(b)
|
||
|
}
|
||
|
|
||
|
// TrimEnumPrefix trims the enum name prefix from an enum value name,
|
||
|
// where the prefix is all lowercase without underscores.
|
||
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
|
||
|
func TrimEnumPrefix(s, prefix string) string {
|
||
|
s0 := s // original input
|
||
|
for len(s) > 0 && len(prefix) > 0 {
|
||
|
if s[0] == '_' {
|
||
|
s = s[1:]
|
||
|
continue
|
||
|
}
|
||
|
if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
|
||
|
return s0 // no prefix match
|
||
|
}
|
||
|
s, prefix = s[1:], prefix[1:]
|
||
|
}
|
||
|
if len(prefix) > 0 {
|
||
|
return s0 // no prefix match
|
||
|
}
|
||
|
s = strings.TrimLeft(s, "_")
|
||
|
if len(s) == 0 {
|
||
|
return s0 // avoid returning empty string
|
||
|
}
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
func isASCIILower(c byte) bool {
|
||
|
return 'a' <= c && c <= 'z'
|
||
|
}
|
||
|
func isASCIIUpper(c byte) bool {
|
||
|
return 'A' <= c && c <= 'Z'
|
||
|
}
|
||
|
func isASCIIDigit(c byte) bool {
|
||
|
return '0' <= c && c <= '9'
|
||
|
}
|