annotate gcc/go/gofrontend/go-encode-id.cc @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 // go-encode-id.cc -- Go identifier encoding hooks
kono
parents:
diff changeset
2
kono
parents:
diff changeset
3 // Copyright 2016 The Go Authors. All rights reserved.
kono
parents:
diff changeset
4 // Use of this source code is governed by a BSD-style
kono
parents:
diff changeset
5 // license that can be found in the LICENSE file.
kono
parents:
diff changeset
6
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
7 #include "go-system.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
8
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
9 #include "gogo.h"
111
kono
parents:
diff changeset
10 #include "go-location.h"
kono
parents:
diff changeset
11 #include "go-linemap.h"
kono
parents:
diff changeset
12 #include "go-encode-id.h"
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
13 #include "lex.h"
111
kono
parents:
diff changeset
14
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
15 // Return whether the character c is OK to use in the assembler. We
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
16 // only permit ASCII alphanumeric characters, underscore, and dot.
111
kono
parents:
diff changeset
17
kono
parents:
diff changeset
18 static bool
kono
parents:
diff changeset
19 char_needs_encoding(char c)
kono
parents:
diff changeset
20 {
kono
parents:
diff changeset
21 switch (c)
kono
parents:
diff changeset
22 {
kono
parents:
diff changeset
23 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
kono
parents:
diff changeset
24 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
kono
parents:
diff changeset
25 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
kono
parents:
diff changeset
26 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
kono
parents:
diff changeset
27 case 'Y': case 'Z':
kono
parents:
diff changeset
28 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
kono
parents:
diff changeset
29 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
kono
parents:
diff changeset
30 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
kono
parents:
diff changeset
31 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
kono
parents:
diff changeset
32 case 'y': case 'z':
kono
parents:
diff changeset
33 case '0': case '1': case '2': case '3': case '4':
kono
parents:
diff changeset
34 case '5': case '6': case '7': case '8': case '9':
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
35 case '_': case '.':
111
kono
parents:
diff changeset
36 return false;
kono
parents:
diff changeset
37 default:
kono
parents:
diff changeset
38 return true;
kono
parents:
diff changeset
39 }
kono
parents:
diff changeset
40 }
kono
parents:
diff changeset
41
kono
parents:
diff changeset
42 // Return whether the identifier needs to be translated because it
kono
parents:
diff changeset
43 // contains non-ASCII characters.
kono
parents:
diff changeset
44
kono
parents:
diff changeset
45 bool
kono
parents:
diff changeset
46 go_id_needs_encoding(const std::string& str)
kono
parents:
diff changeset
47 {
kono
parents:
diff changeset
48 for (std::string::const_iterator p = str.begin();
kono
parents:
diff changeset
49 p != str.end();
kono
parents:
diff changeset
50 ++p)
kono
parents:
diff changeset
51 if (char_needs_encoding(*p))
kono
parents:
diff changeset
52 return true;
kono
parents:
diff changeset
53 return false;
kono
parents:
diff changeset
54 }
kono
parents:
diff changeset
55
kono
parents:
diff changeset
56 // Pull the next UTF-8 character out of P and store it in *PC. Return
kono
parents:
diff changeset
57 // the number of bytes read.
kono
parents:
diff changeset
58
kono
parents:
diff changeset
59 static size_t
kono
parents:
diff changeset
60 fetch_utf8_char(const char* p, unsigned int* pc)
kono
parents:
diff changeset
61 {
kono
parents:
diff changeset
62 unsigned char c = *p;
kono
parents:
diff changeset
63 if ((c & 0x80) == 0)
kono
parents:
diff changeset
64 {
kono
parents:
diff changeset
65 *pc = c;
kono
parents:
diff changeset
66 return 1;
kono
parents:
diff changeset
67 }
kono
parents:
diff changeset
68 size_t len = 0;
kono
parents:
diff changeset
69 while ((c & 0x80) != 0)
kono
parents:
diff changeset
70 {
kono
parents:
diff changeset
71 ++len;
kono
parents:
diff changeset
72 c <<= 1;
kono
parents:
diff changeset
73 }
kono
parents:
diff changeset
74 unsigned int rc = *p & ((1 << (7 - len)) - 1);
kono
parents:
diff changeset
75 for (size_t i = 1; i < len; i++)
kono
parents:
diff changeset
76 {
kono
parents:
diff changeset
77 unsigned int u = p[i];
kono
parents:
diff changeset
78 rc <<= 6;
kono
parents:
diff changeset
79 rc |= u & 0x3f;
kono
parents:
diff changeset
80 }
kono
parents:
diff changeset
81 *pc = rc;
kono
parents:
diff changeset
82 return len;
kono
parents:
diff changeset
83 }
kono
parents:
diff changeset
84
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
85 // Encode an identifier using ASCII characters. The encoding is
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
86 // described in detail near the end of the long comment at the start
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
87 // of names.cc. Short version: translate all non-ASCII-alphanumeric
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
88 // characters into ..uXXXX or ..UXXXXXXXX.
111
kono
parents:
diff changeset
89
kono
parents:
diff changeset
90 std::string
kono
parents:
diff changeset
91 go_encode_id(const std::string &id)
kono
parents:
diff changeset
92 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
93 if (Lex::is_invalid_identifier(id))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
94 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
95 go_assert(saw_errors());
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
96 return id;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
97 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
98
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
99 // The encoding is only unambiguous if the input string does not
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
100 // contain ..u or ..U.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
101 go_assert(id.find("..u") == std::string::npos);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
102 go_assert(id.find("..U") == std::string::npos);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
103
111
kono
parents:
diff changeset
104 std::string ret;
kono
parents:
diff changeset
105 const char* p = id.c_str();
kono
parents:
diff changeset
106 const char* pend = p + id.length();
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
107
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
108 // A leading ".0" is a space introduced before a mangled type name
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
109 // that starts with a 'u' or 'U', to avoid confusion with the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
110 // mangling used here. We don't need a leading ".0", and we don't
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
111 // want symbols that start with '.', so remove it.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
112 if (p[0] == '.' && p[1] == '0')
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
113 p += 2;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
114
111
kono
parents:
diff changeset
115 while (p < pend)
kono
parents:
diff changeset
116 {
kono
parents:
diff changeset
117 unsigned int c;
kono
parents:
diff changeset
118 size_t len = fetch_utf8_char(p, &c);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
119 if (len == 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
120 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
121 // At this point we should only be seeing alphanumerics or
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
122 // underscore or dot.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
123 go_assert(!char_needs_encoding(c));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
124 ret += c;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
125 }
111
kono
parents:
diff changeset
126 else
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
127 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
128 char buf[16];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
129 if (c < 0x10000)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
130 snprintf(buf, sizeof buf, "..u%04x", c);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
131 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
132 snprintf(buf, sizeof buf, "..U%08x", c);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
133
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
134 // We don't want a symbol to start with '.', so add a prefix
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
135 // if needed.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
136 if (ret.empty())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
137 ret += '_';
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
138
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
139 ret += buf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
140 }
111
kono
parents:
diff changeset
141 p += len;
kono
parents:
diff changeset
142 }
kono
parents:
diff changeset
143 return ret;
kono
parents:
diff changeset
144 }
kono
parents:
diff changeset
145
kono
parents:
diff changeset
146 std::string
kono
parents:
diff changeset
147 go_selectively_encode_id(const std::string &id)
kono
parents:
diff changeset
148 {
kono
parents:
diff changeset
149 if (go_id_needs_encoding(id))
kono
parents:
diff changeset
150 return go_encode_id(id);
kono
parents:
diff changeset
151 return std::string();
kono
parents:
diff changeset
152 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
153
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
154 // Encode a struct field tag. This is only used when we need to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
155 // create a type descriptor for an anonymous struct type with field
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
156 // tags. This mangling is applied before go_encode_id. We skip
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
157 // alphanumerics and underscore, replace every other single byte
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
158 // character with .xNN, and leave larger UTF-8 characters for
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
159 // go_encode_id.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
160
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
161 std::string
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
162 go_mangle_struct_tag(const std::string& tag)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
163 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
164 std::string ret;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
165 const char* p = tag.c_str();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
166 const char* pend = p + tag.length();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
167 while (p < pend)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
168 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
169 unsigned int c;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
170 size_t len = fetch_utf8_char(p, &c);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
171 if (len > 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
172 ret.append(p, len);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
173 else if (!char_needs_encoding(c) && c != '.')
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
174 ret += c;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
175 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
176 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
177 char buf[16];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
178 snprintf(buf, sizeof buf, ".x%02x", c);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
179 ret += buf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
180 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
181 p += len;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
182 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
183 return ret;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
184 }