Source file
src/mime/encodedword.go
Documentation: mime
1
2
3
4
5 package mime
6
7 import (
8 "bytes"
9 "encoding/base64"
10 "errors"
11 "fmt"
12 "io"
13 "strings"
14 "unicode"
15 "unicode/utf8"
16 )
17
18
19 type WordEncoder byte
20
21 const (
22
23 BEncoding = WordEncoder('b')
24
25 QEncoding = WordEncoder('q')
26 )
27
28 var (
29 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
30 )
31
32
33
34
35 func (e WordEncoder) Encode(charset, s string) string {
36 if !needsEncoding(s) {
37 return s
38 }
39 return e.encodeWord(charset, s)
40 }
41
42 func needsEncoding(s string) bool {
43 for _, b := range s {
44 if (b < ' ' || b > '~') && b != '\t' {
45 return true
46 }
47 }
48 return false
49 }
50
51
52 func (e WordEncoder) encodeWord(charset, s string) string {
53 var buf strings.Builder
54
55
56
57 buf.Grow(48)
58
59 e.openWord(&buf, charset)
60 if e == BEncoding {
61 e.bEncode(&buf, charset, s)
62 } else {
63 e.qEncode(&buf, charset, s)
64 }
65 closeWord(&buf)
66
67 return buf.String()
68 }
69
70 const (
71
72
73 maxEncodedWordLen = 75
74
75
76 maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=")
77 )
78
79 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
80
81
82 func (e WordEncoder) bEncode(buf *strings.Builder, charset, s string) {
83 w := base64.NewEncoder(base64.StdEncoding, buf)
84
85
86 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
87 io.WriteString(w, s)
88 w.Close()
89 return
90 }
91
92 var currentLen, last, runeLen int
93 for i := 0; i < len(s); i += runeLen {
94
95
96 _, runeLen = utf8.DecodeRuneInString(s[i:])
97
98 if currentLen+runeLen <= maxBase64Len {
99 currentLen += runeLen
100 } else {
101 io.WriteString(w, s[last:i])
102 w.Close()
103 e.splitWord(buf, charset)
104 last = i
105 currentLen = runeLen
106 }
107 }
108 io.WriteString(w, s[last:])
109 w.Close()
110 }
111
112
113
114 func (e WordEncoder) qEncode(buf *strings.Builder, charset, s string) {
115
116 if !isUTF8(charset) {
117 writeQString(buf, s)
118 return
119 }
120
121 var currentLen, runeLen int
122 for i := 0; i < len(s); i += runeLen {
123 b := s[i]
124
125
126 var encLen int
127 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
128 runeLen, encLen = 1, 1
129 } else {
130 _, runeLen = utf8.DecodeRuneInString(s[i:])
131 encLen = 3 * runeLen
132 }
133
134 if currentLen+encLen > maxContentLen {
135 e.splitWord(buf, charset)
136 currentLen = 0
137 }
138 writeQString(buf, s[i:i+runeLen])
139 currentLen += encLen
140 }
141 }
142
143
144 func writeQString(buf *strings.Builder, s string) {
145 for i := 0; i < len(s); i++ {
146 switch b := s[i]; {
147 case b == ' ':
148 buf.WriteByte('_')
149 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
150 buf.WriteByte(b)
151 default:
152 buf.WriteByte('=')
153 buf.WriteByte(upperhex[b>>4])
154 buf.WriteByte(upperhex[b&0x0f])
155 }
156 }
157 }
158
159
160 func (e WordEncoder) openWord(buf *strings.Builder, charset string) {
161 buf.WriteString("=?")
162 buf.WriteString(charset)
163 buf.WriteByte('?')
164 buf.WriteByte(byte(e))
165 buf.WriteByte('?')
166 }
167
168
169 func closeWord(buf *strings.Builder) {
170 buf.WriteString("?=")
171 }
172
173
174 func (e WordEncoder) splitWord(buf *strings.Builder, charset string) {
175 closeWord(buf)
176 buf.WriteByte(' ')
177 e.openWord(buf, charset)
178 }
179
180 func isUTF8(charset string) bool {
181 return strings.EqualFold(charset, "UTF-8")
182 }
183
184 const upperhex = "0123456789ABCDEF"
185
186
187 type WordDecoder struct {
188
189
190
191
192
193
194 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
195 }
196
197
198 func (d *WordDecoder) Decode(word string) (string, error) {
199
200
201 if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
202 return "", errInvalidWord
203 }
204 word = word[2 : len(word)-2]
205
206
207 charset, text, _ := strings.Cut(word, "?")
208 if charset == "" {
209 return "", errInvalidWord
210 }
211 encoding, text, _ := strings.Cut(text, "?")
212 if len(encoding) != 1 {
213 return "", errInvalidWord
214 }
215
216 content, err := decode(encoding[0], text)
217 if err != nil {
218 return "", err
219 }
220
221 var buf strings.Builder
222 if err := d.convert(&buf, charset, content); err != nil {
223 return "", err
224 }
225 return buf.String(), nil
226 }
227
228
229
230 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
231
232 i := strings.Index(header, "=?")
233 if i == -1 {
234 return header, nil
235 }
236
237 var buf strings.Builder
238
239 buf.WriteString(header[:i])
240 header = header[i:]
241
242 betweenWords := false
243 for {
244 start := strings.Index(header, "=?")
245 if start == -1 {
246 break
247 }
248 cur := start + len("=?")
249
250 i := strings.Index(header[cur:], "?")
251 if i == -1 {
252 break
253 }
254 charset := header[cur : cur+i]
255 cur += i + len("?")
256
257 if len(header) < cur+len("Q??=") {
258 break
259 }
260 encoding := header[cur]
261 cur++
262
263 if header[cur] != '?' {
264 break
265 }
266 cur++
267
268 j := strings.Index(header[cur:], "?=")
269 if j == -1 {
270 break
271 }
272 text := header[cur : cur+j]
273 end := cur + j + len("?=")
274
275 content, err := decode(encoding, text)
276 if err != nil {
277 betweenWords = false
278 buf.WriteString(header[:start+2])
279 header = header[start+2:]
280 continue
281 }
282
283
284
285 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
286 buf.WriteString(header[:start])
287 }
288
289 if err := d.convert(&buf, charset, content); err != nil {
290 return "", err
291 }
292
293 header = header[end:]
294 betweenWords = true
295 }
296
297 if len(header) > 0 {
298 buf.WriteString(header)
299 }
300
301 return buf.String(), nil
302 }
303
304 func decode(encoding byte, text string) ([]byte, error) {
305 switch encoding {
306 case 'B', 'b':
307 return base64.StdEncoding.DecodeString(text)
308 case 'Q', 'q':
309 return qDecode(text)
310 default:
311 return nil, errInvalidWord
312 }
313 }
314
315 func (d *WordDecoder) convert(buf *strings.Builder, charset string, content []byte) error {
316 switch {
317 case strings.EqualFold("utf-8", charset):
318 buf.Write(content)
319 case strings.EqualFold("iso-8859-1", charset):
320 for _, c := range content {
321 buf.WriteRune(rune(c))
322 }
323 case strings.EqualFold("us-ascii", charset):
324 for _, c := range content {
325 if c >= utf8.RuneSelf {
326 buf.WriteRune(unicode.ReplacementChar)
327 } else {
328 buf.WriteByte(c)
329 }
330 }
331 default:
332 if d.CharsetReader == nil {
333 return fmt.Errorf("mime: unhandled charset %q", charset)
334 }
335 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
336 if err != nil {
337 return err
338 }
339 if _, err = io.Copy(buf, r); err != nil {
340 return err
341 }
342 }
343 return nil
344 }
345
346
347
348 func hasNonWhitespace(s string) bool {
349 for _, b := range s {
350 switch b {
351
352
353 case ' ', '\t', '\n', '\r':
354 default:
355 return true
356 }
357 }
358 return false
359 }
360
361
362 func qDecode(s string) ([]byte, error) {
363 dec := make([]byte, len(s))
364 n := 0
365 for i := 0; i < len(s); i++ {
366 switch c := s[i]; {
367 case c == '_':
368 dec[n] = ' '
369 case c == '=':
370 if i+2 >= len(s) {
371 return nil, errInvalidWord
372 }
373 b, err := readHexByte(s[i+1], s[i+2])
374 if err != nil {
375 return nil, err
376 }
377 dec[n] = b
378 i += 2
379 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
380 dec[n] = c
381 default:
382 return nil, errInvalidWord
383 }
384 n++
385 }
386
387 return dec[:n], nil
388 }
389
390
391 func readHexByte(a, b byte) (byte, error) {
392 var hb, lb byte
393 var err error
394 if hb, err = fromHex(a); err != nil {
395 return 0, err
396 }
397 if lb, err = fromHex(b); err != nil {
398 return 0, err
399 }
400 return hb<<4 | lb, nil
401 }
402
403 func fromHex(b byte) (byte, error) {
404 switch {
405 case b >= '0' && b <= '9':
406 return b - '0', nil
407 case b >= 'A' && b <= 'F':
408 return b - 'A' + 10, nil
409
410 case b >= 'a' && b <= 'f':
411 return b - 'a' + 10, nil
412 }
413 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
414 }
415
View as plain text