...

Source file src/net/mail/message.go

Documentation: net/mail

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322 and
     9  extended by RFC 6532.
    10  Notable divergences:
    11    - Obsolete address formats are not parsed, including addresses with
    12      embedded route information.
    13    - The full range of spacing (the CFWS syntax element) is not supported,
    14      such as breaking addresses across lines.
    15    - No unicode normalization is performed.
    16    - The special characters ()[]:;@\, are allowed to appear unquoted in names.
    17    - A leading From line is permitted, as in mbox format (RFC 4155).
    18  */
    19  package mail
    20  
    21  import (
    22  	"bufio"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"log"
    27  	"mime"
    28  	"net/textproto"
    29  	"strings"
    30  	"sync"
    31  	"time"
    32  	"unicode/utf8"
    33  )
    34  
    35  var debug = debugT(false)
    36  
    37  type debugT bool
    38  
    39  func (d debugT) Printf(format string, args ...any) {
    40  	if d {
    41  		log.Printf(format, args...)
    42  	}
    43  }
    44  
    45  // A Message represents a parsed mail message.
    46  type Message struct {
    47  	Header Header
    48  	Body   io.Reader
    49  }
    50  
    51  // ReadMessage reads a message from r.
    52  // The headers are parsed, and the body of the message will be available
    53  // for reading from msg.Body.
    54  func ReadMessage(r io.Reader) (msg *Message, err error) {
    55  	tp := textproto.NewReader(bufio.NewReader(r))
    56  
    57  	hdr, err := readHeader(tp)
    58  	if err != nil && (err != io.EOF || len(hdr) == 0) {
    59  		return nil, err
    60  	}
    61  
    62  	return &Message{
    63  		Header: Header(hdr),
    64  		Body:   tp.R,
    65  	}, nil
    66  }
    67  
    68  // readHeader reads the message headers from r.
    69  // This is like textproto.ReadMIMEHeader, but doesn't validate.
    70  // The fix for issue #53188 tightened up net/textproto to enforce
    71  // restrictions of RFC 7230.
    72  // This package implements RFC 5322, which does not have those restrictions.
    73  // This function copies the relevant code from net/textproto,
    74  // simplified for RFC 5322.
    75  func readHeader(r *textproto.Reader) (map[string][]string, error) {
    76  	m := make(map[string][]string)
    77  
    78  	// The first line cannot start with a leading space.
    79  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
    80  		line, err := r.ReadLine()
    81  		if err != nil {
    82  			return m, err
    83  		}
    84  		return m, errors.New("malformed initial line: " + line)
    85  	}
    86  
    87  	for {
    88  		kv, err := r.ReadContinuedLine()
    89  		if kv == "" {
    90  			return m, err
    91  		}
    92  
    93  		// Key ends at first colon.
    94  		k, v, ok := strings.Cut(kv, ":")
    95  		if !ok {
    96  			return m, errors.New("malformed header line: " + kv)
    97  		}
    98  		key := textproto.CanonicalMIMEHeaderKey(k)
    99  
   100  		// Permit empty key, because that is what we did in the past.
   101  		if key == "" {
   102  			continue
   103  		}
   104  
   105  		// Skip initial spaces in value.
   106  		value := strings.TrimLeft(v, " \t")
   107  
   108  		m[key] = append(m[key], value)
   109  
   110  		if err != nil {
   111  			return m, err
   112  		}
   113  	}
   114  }
   115  
   116  // Layouts suitable for passing to time.Parse.
   117  // These are tried in order.
   118  var (
   119  	dateLayoutsBuildOnce sync.Once
   120  	dateLayouts          []string
   121  )
   122  
   123  func buildDateLayouts() {
   124  	// Generate layouts based on RFC 5322, section 3.3.
   125  
   126  	dows := [...]string{"", "Mon, "}   // day-of-week
   127  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
   128  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
   129  	seconds := [...]string{":05", ""}  // second
   130  	// "-0700 (MST)" is not in RFC 5322, but is common.
   131  	zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ...
   132  
   133  	for _, dow := range dows {
   134  		for _, day := range days {
   135  			for _, year := range years {
   136  				for _, second := range seconds {
   137  					for _, zone := range zones {
   138  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
   139  						dateLayouts = append(dateLayouts, s)
   140  					}
   141  				}
   142  			}
   143  		}
   144  	}
   145  }
   146  
   147  // ParseDate parses an RFC 5322 date string.
   148  func ParseDate(date string) (time.Time, error) {
   149  	dateLayoutsBuildOnce.Do(buildDateLayouts)
   150  	// CR and LF must match and are tolerated anywhere in the date field.
   151  	date = strings.ReplaceAll(date, "\r\n", "")
   152  	if strings.Contains(date, "\r") {
   153  		return time.Time{}, errors.New("mail: header has a CR without LF")
   154  	}
   155  	// Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
   156  	p := addrParser{date, nil}
   157  	p.skipSpace()
   158  
   159  	// RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
   160  	// zone length is always 5 chars unless obsolete (obs-zone)
   161  	if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
   162  		date = p.s[:ind+5]
   163  		p.s = p.s[ind+5:]
   164  	} else {
   165  		ind := strings.Index(p.s, "T")
   166  		if ind == 0 {
   167  			// In this case we have the following date formats:
   168  			// * Thu, 20 Nov 1997 09:55:06 MDT
   169  			// * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
   170  			// * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
   171  			ind = strings.Index(p.s[1:], "T")
   172  			if ind != -1 {
   173  				ind++
   174  			}
   175  		}
   176  
   177  		if ind != -1 && len(p.s) >= ind+5 {
   178  			// The last letter T of the obsolete time zone is checked when no standard time zone is found.
   179  			// If T is misplaced, the date to parse is garbage.
   180  			date = p.s[:ind+1]
   181  			p.s = p.s[ind+1:]
   182  		}
   183  	}
   184  	if !p.skipCFWS() {
   185  		return time.Time{}, errors.New("mail: misformatted parenthetical comment")
   186  	}
   187  	for _, layout := range dateLayouts {
   188  		t, err := time.Parse(layout, date)
   189  		if err == nil {
   190  			return t, nil
   191  		}
   192  	}
   193  	return time.Time{}, errors.New("mail: header could not be parsed")
   194  }
   195  
   196  // A Header represents the key-value pairs in a mail message header.
   197  type Header map[string][]string
   198  
   199  // Get gets the first value associated with the given key.
   200  // It is case insensitive; CanonicalMIMEHeaderKey is used
   201  // to canonicalize the provided key.
   202  // If there are no values associated with the key, Get returns "".
   203  // To access multiple values of a key, or to use non-canonical keys,
   204  // access the map directly.
   205  func (h Header) Get(key string) string {
   206  	return textproto.MIMEHeader(h).Get(key)
   207  }
   208  
   209  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   210  
   211  // Date parses the Date header field.
   212  func (h Header) Date() (time.Time, error) {
   213  	hdr := h.Get("Date")
   214  	if hdr == "" {
   215  		return time.Time{}, ErrHeaderNotPresent
   216  	}
   217  	return ParseDate(hdr)
   218  }
   219  
   220  // AddressList parses the named header field as a list of addresses.
   221  func (h Header) AddressList(key string) ([]*Address, error) {
   222  	hdr := h.Get(key)
   223  	if hdr == "" {
   224  		return nil, ErrHeaderNotPresent
   225  	}
   226  	return ParseAddressList(hdr)
   227  }
   228  
   229  // Address represents a single mail address.
   230  // An address such as "Barry Gibbs <bg@example.com>" is represented
   231  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   232  type Address struct {
   233  	Name    string // Proper name; may be empty.
   234  	Address string // user@domain
   235  }
   236  
   237  // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   238  func ParseAddress(address string) (*Address, error) {
   239  	return (&addrParser{s: address}).parseSingleAddress()
   240  }
   241  
   242  // ParseAddressList parses the given string as a list of addresses.
   243  func ParseAddressList(list string) ([]*Address, error) {
   244  	return (&addrParser{s: list}).parseAddressList()
   245  }
   246  
   247  // An AddressParser is an RFC 5322 address parser.
   248  type AddressParser struct {
   249  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   250  	WordDecoder *mime.WordDecoder
   251  }
   252  
   253  // Parse parses a single RFC 5322 address of the
   254  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   255  func (p *AddressParser) Parse(address string) (*Address, error) {
   256  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   257  }
   258  
   259  // ParseList parses the given string as a list of comma-separated addresses
   260  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   261  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   262  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   263  }
   264  
   265  // String formats the address as a valid RFC 5322 address.
   266  // If the address's name contains non-ASCII characters
   267  // the name will be rendered according to RFC 2047.
   268  func (a *Address) String() string {
   269  	// Format address local@domain
   270  	at := strings.LastIndex(a.Address, "@")
   271  	var local, domain string
   272  	if at < 0 {
   273  		// This is a malformed address ("@" is required in addr-spec);
   274  		// treat the whole address as local-part.
   275  		local = a.Address
   276  	} else {
   277  		local, domain = a.Address[:at], a.Address[at+1:]
   278  	}
   279  
   280  	// Add quotes if needed
   281  	quoteLocal := false
   282  	for i, r := range local {
   283  		if isAtext(r, false) {
   284  			continue
   285  		}
   286  		if r == '.' {
   287  			// Dots are okay if they are surrounded by atext.
   288  			// We only need to check that the previous byte is
   289  			// not a dot, and this isn't the end of the string.
   290  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   291  				continue
   292  			}
   293  		}
   294  		quoteLocal = true
   295  		break
   296  	}
   297  	if quoteLocal {
   298  		local = quoteString(local)
   299  
   300  	}
   301  
   302  	s := "<" + local + "@" + domain + ">"
   303  
   304  	if a.Name == "" {
   305  		return s
   306  	}
   307  
   308  	// If every character is printable ASCII, quoting is simple.
   309  	allPrintable := true
   310  	for _, r := range a.Name {
   311  		// isWSP here should actually be isFWS,
   312  		// but we don't support folding yet.
   313  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   314  			allPrintable = false
   315  			break
   316  		}
   317  	}
   318  	if allPrintable {
   319  		return quoteString(a.Name) + " " + s
   320  	}
   321  
   322  	// Text in an encoded-word in a display-name must not contain certain
   323  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   324  	// When this is the case encode the name using base64 encoding.
   325  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   326  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   327  	}
   328  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   329  }
   330  
   331  type addrParser struct {
   332  	s   string
   333  	dec *mime.WordDecoder // may be nil
   334  }
   335  
   336  func (p *addrParser) parseAddressList() ([]*Address, error) {
   337  	var list []*Address
   338  	for {
   339  		p.skipSpace()
   340  
   341  		// allow skipping empty entries (RFC5322 obs-addr-list)
   342  		if p.consume(',') {
   343  			continue
   344  		}
   345  
   346  		addrs, err := p.parseAddress(true)
   347  		if err != nil {
   348  			return nil, err
   349  		}
   350  		list = append(list, addrs...)
   351  
   352  		if !p.skipCFWS() {
   353  			return nil, errors.New("mail: misformatted parenthetical comment")
   354  		}
   355  		if p.empty() {
   356  			break
   357  		}
   358  		if p.peek() != ',' {
   359  			return nil, errors.New("mail: expected comma")
   360  		}
   361  
   362  		// Skip empty entries for obs-addr-list.
   363  		for p.consume(',') {
   364  			p.skipSpace()
   365  		}
   366  		if p.empty() {
   367  			break
   368  		}
   369  	}
   370  	return list, nil
   371  }
   372  
   373  func (p *addrParser) parseSingleAddress() (*Address, error) {
   374  	addrs, err := p.parseAddress(true)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  	if !p.skipCFWS() {
   379  		return nil, errors.New("mail: misformatted parenthetical comment")
   380  	}
   381  	if !p.empty() {
   382  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   383  	}
   384  	if len(addrs) == 0 {
   385  		return nil, errors.New("mail: empty group")
   386  	}
   387  	if len(addrs) > 1 {
   388  		return nil, errors.New("mail: group with multiple addresses")
   389  	}
   390  	return addrs[0], nil
   391  }
   392  
   393  // parseAddress parses a single RFC 5322 address at the start of p.
   394  func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
   395  	debug.Printf("parseAddress: %q", p.s)
   396  	p.skipSpace()
   397  	if p.empty() {
   398  		return nil, errors.New("mail: no address")
   399  	}
   400  
   401  	// address = mailbox / group
   402  	// mailbox = name-addr / addr-spec
   403  	// group = display-name ":" [group-list] ";" [CFWS]
   404  
   405  	// addr-spec has a more restricted grammar than name-addr,
   406  	// so try parsing it first, and fallback to name-addr.
   407  	// TODO(dsymonds): Is this really correct?
   408  	spec, err := p.consumeAddrSpec()
   409  	if err == nil {
   410  		var displayName string
   411  		p.skipSpace()
   412  		if !p.empty() && p.peek() == '(' {
   413  			displayName, err = p.consumeDisplayNameComment()
   414  			if err != nil {
   415  				return nil, err
   416  			}
   417  		}
   418  
   419  		return []*Address{{
   420  			Name:    displayName,
   421  			Address: spec,
   422  		}}, err
   423  	}
   424  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   425  	debug.Printf("parseAddress: state is now %q", p.s)
   426  
   427  	// display-name
   428  	var displayName string
   429  	if p.peek() != '<' {
   430  		displayName, err = p.consumePhrase()
   431  		if err != nil {
   432  			return nil, err
   433  		}
   434  	}
   435  	debug.Printf("parseAddress: displayName=%q", displayName)
   436  
   437  	p.skipSpace()
   438  	if handleGroup {
   439  		if p.consume(':') {
   440  			return p.consumeGroupList()
   441  		}
   442  	}
   443  	// angle-addr = "<" addr-spec ">"
   444  	if !p.consume('<') {
   445  		atext := true
   446  		for _, r := range displayName {
   447  			if !isAtext(r, true) {
   448  				atext = false
   449  				break
   450  			}
   451  		}
   452  		if atext {
   453  			// The input is like "foo.bar"; it's possible the input
   454  			// meant to be "foo.bar@domain", or "foo.bar <...>".
   455  			return nil, errors.New("mail: missing '@' or angle-addr")
   456  		}
   457  		// The input is like "Full Name", which couldn't possibly be a
   458  		// valid email address if followed by "@domain"; the input
   459  		// likely meant to be "Full Name <...>".
   460  		return nil, errors.New("mail: no angle-addr")
   461  	}
   462  	spec, err = p.consumeAddrSpec()
   463  	if err != nil {
   464  		return nil, err
   465  	}
   466  	if !p.consume('>') {
   467  		return nil, errors.New("mail: unclosed angle-addr")
   468  	}
   469  	debug.Printf("parseAddress: spec=%q", spec)
   470  
   471  	return []*Address{{
   472  		Name:    displayName,
   473  		Address: spec,
   474  	}}, nil
   475  }
   476  
   477  func (p *addrParser) consumeGroupList() ([]*Address, error) {
   478  	var group []*Address
   479  	// handle empty group.
   480  	p.skipSpace()
   481  	if p.consume(';') {
   482  		if !p.skipCFWS() {
   483  			return nil, errors.New("mail: misformatted parenthetical comment")
   484  		}
   485  		return group, nil
   486  	}
   487  
   488  	for {
   489  		p.skipSpace()
   490  		// embedded groups not allowed.
   491  		addrs, err := p.parseAddress(false)
   492  		if err != nil {
   493  			return nil, err
   494  		}
   495  		group = append(group, addrs...)
   496  
   497  		if !p.skipCFWS() {
   498  			return nil, errors.New("mail: misformatted parenthetical comment")
   499  		}
   500  		if p.consume(';') {
   501  			if !p.skipCFWS() {
   502  				return nil, errors.New("mail: misformatted parenthetical comment")
   503  			}
   504  			break
   505  		}
   506  		if !p.consume(',') {
   507  			return nil, errors.New("mail: expected comma")
   508  		}
   509  	}
   510  	return group, nil
   511  }
   512  
   513  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   514  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   515  	debug.Printf("consumeAddrSpec: %q", p.s)
   516  
   517  	orig := *p
   518  	defer func() {
   519  		if err != nil {
   520  			*p = orig
   521  		}
   522  	}()
   523  
   524  	// local-part = dot-atom / quoted-string
   525  	var localPart string
   526  	p.skipSpace()
   527  	if p.empty() {
   528  		return "", errors.New("mail: no addr-spec")
   529  	}
   530  	if p.peek() == '"' {
   531  		// quoted-string
   532  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   533  		localPart, err = p.consumeQuotedString()
   534  		if localPart == "" {
   535  			err = errors.New("mail: empty quoted string in addr-spec")
   536  		}
   537  	} else {
   538  		// dot-atom
   539  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   540  		localPart, err = p.consumeAtom(true, false)
   541  	}
   542  	if err != nil {
   543  		debug.Printf("consumeAddrSpec: failed: %v", err)
   544  		return "", err
   545  	}
   546  
   547  	if !p.consume('@') {
   548  		return "", errors.New("mail: missing @ in addr-spec")
   549  	}
   550  
   551  	// domain = dot-atom / domain-literal
   552  	var domain string
   553  	p.skipSpace()
   554  	if p.empty() {
   555  		return "", errors.New("mail: no domain in addr-spec")
   556  	}
   557  	// TODO(dsymonds): Handle domain-literal
   558  	domain, err = p.consumeAtom(true, false)
   559  	if err != nil {
   560  		return "", err
   561  	}
   562  
   563  	return localPart + "@" + domain, nil
   564  }
   565  
   566  // consumePhrase parses the RFC 5322 phrase at the start of p.
   567  func (p *addrParser) consumePhrase() (phrase string, err error) {
   568  	debug.Printf("consumePhrase: [%s]", p.s)
   569  	// phrase = 1*word
   570  	var words []string
   571  	var isPrevEncoded bool
   572  	for {
   573  		// obs-phrase allows CFWS after one word
   574  		if len(words) > 0 {
   575  			if !p.skipCFWS() {
   576  				return "", errors.New("mail: misformatted parenthetical comment")
   577  			}
   578  		}
   579  		// word = atom / quoted-string
   580  		var word string
   581  		p.skipSpace()
   582  		if p.empty() {
   583  			break
   584  		}
   585  		isEncoded := false
   586  		if p.peek() == '"' {
   587  			// quoted-string
   588  			word, err = p.consumeQuotedString()
   589  		} else {
   590  			// atom
   591  			// We actually parse dot-atom here to be more permissive
   592  			// than what RFC 5322 specifies.
   593  			word, err = p.consumeAtom(true, true)
   594  			if err == nil {
   595  				word, isEncoded, err = p.decodeRFC2047Word(word)
   596  			}
   597  		}
   598  
   599  		if err != nil {
   600  			break
   601  		}
   602  		debug.Printf("consumePhrase: consumed %q", word)
   603  		if isPrevEncoded && isEncoded {
   604  			words[len(words)-1] += word
   605  		} else {
   606  			words = append(words, word)
   607  		}
   608  		isPrevEncoded = isEncoded
   609  	}
   610  	// Ignore any error if we got at least one word.
   611  	if err != nil && len(words) == 0 {
   612  		debug.Printf("consumePhrase: hit err: %v", err)
   613  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   614  	}
   615  	phrase = strings.Join(words, " ")
   616  	return phrase, nil
   617  }
   618  
   619  // consumeQuotedString parses the quoted string at the start of p.
   620  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   621  	// Assume first byte is '"'.
   622  	i := 1
   623  	qsb := make([]rune, 0, 10)
   624  
   625  	escaped := false
   626  
   627  Loop:
   628  	for {
   629  		r, size := utf8.DecodeRuneInString(p.s[i:])
   630  
   631  		switch {
   632  		case size == 0:
   633  			return "", errors.New("mail: unclosed quoted-string")
   634  
   635  		case size == 1 && r == utf8.RuneError:
   636  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   637  
   638  		case escaped:
   639  			//  quoted-pair = ("\" (VCHAR / WSP))
   640  
   641  			if !isVchar(r) && !isWSP(r) {
   642  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   643  			}
   644  
   645  			qsb = append(qsb, r)
   646  			escaped = false
   647  
   648  		case isQtext(r) || isWSP(r):
   649  			// qtext (printable US-ASCII excluding " and \), or
   650  			// FWS (almost; we're ignoring CRLF)
   651  			qsb = append(qsb, r)
   652  
   653  		case r == '"':
   654  			break Loop
   655  
   656  		case r == '\\':
   657  			escaped = true
   658  
   659  		default:
   660  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   661  
   662  		}
   663  
   664  		i += size
   665  	}
   666  	p.s = p.s[i+1:]
   667  	return string(qsb), nil
   668  }
   669  
   670  // consumeAtom parses an RFC 5322 atom at the start of p.
   671  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   672  // If permissive is true, consumeAtom will not fail on:
   673  // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
   674  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   675  	i := 0
   676  
   677  Loop:
   678  	for {
   679  		r, size := utf8.DecodeRuneInString(p.s[i:])
   680  		switch {
   681  		case size == 1 && r == utf8.RuneError:
   682  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   683  
   684  		case size == 0 || !isAtext(r, dot):
   685  			break Loop
   686  
   687  		default:
   688  			i += size
   689  
   690  		}
   691  	}
   692  
   693  	if i == 0 {
   694  		return "", errors.New("mail: invalid string")
   695  	}
   696  	atom, p.s = p.s[:i], p.s[i:]
   697  	if !permissive {
   698  		if strings.HasPrefix(atom, ".") {
   699  			return "", errors.New("mail: leading dot in atom")
   700  		}
   701  		if strings.Contains(atom, "..") {
   702  			return "", errors.New("mail: double dot in atom")
   703  		}
   704  		if strings.HasSuffix(atom, ".") {
   705  			return "", errors.New("mail: trailing dot in atom")
   706  		}
   707  	}
   708  	return atom, nil
   709  }
   710  
   711  func (p *addrParser) consumeDisplayNameComment() (string, error) {
   712  	if !p.consume('(') {
   713  		return "", errors.New("mail: comment does not start with (")
   714  	}
   715  	comment, ok := p.consumeComment()
   716  	if !ok {
   717  		return "", errors.New("mail: misformatted parenthetical comment")
   718  	}
   719  
   720  	// TODO(stapelberg): parse quoted-string within comment
   721  	words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
   722  	for idx, word := range words {
   723  		decoded, isEncoded, err := p.decodeRFC2047Word(word)
   724  		if err != nil {
   725  			return "", err
   726  		}
   727  		if isEncoded {
   728  			words[idx] = decoded
   729  		}
   730  	}
   731  
   732  	return strings.Join(words, " "), nil
   733  }
   734  
   735  func (p *addrParser) consume(c byte) bool {
   736  	if p.empty() || p.peek() != c {
   737  		return false
   738  	}
   739  	p.s = p.s[1:]
   740  	return true
   741  }
   742  
   743  // skipSpace skips the leading space and tab characters.
   744  func (p *addrParser) skipSpace() {
   745  	p.s = strings.TrimLeft(p.s, " \t")
   746  }
   747  
   748  func (p *addrParser) peek() byte {
   749  	return p.s[0]
   750  }
   751  
   752  func (p *addrParser) empty() bool {
   753  	return p.len() == 0
   754  }
   755  
   756  func (p *addrParser) len() int {
   757  	return len(p.s)
   758  }
   759  
   760  // skipCFWS skips CFWS as defined in RFC5322.
   761  func (p *addrParser) skipCFWS() bool {
   762  	p.skipSpace()
   763  
   764  	for {
   765  		if !p.consume('(') {
   766  			break
   767  		}
   768  
   769  		if _, ok := p.consumeComment(); !ok {
   770  			return false
   771  		}
   772  
   773  		p.skipSpace()
   774  	}
   775  
   776  	return true
   777  }
   778  
   779  func (p *addrParser) consumeComment() (string, bool) {
   780  	// '(' already consumed.
   781  	depth := 1
   782  
   783  	var comment string
   784  	for {
   785  		if p.empty() || depth == 0 {
   786  			break
   787  		}
   788  
   789  		if p.peek() == '\\' && p.len() > 1 {
   790  			p.s = p.s[1:]
   791  		} else if p.peek() == '(' {
   792  			depth++
   793  		} else if p.peek() == ')' {
   794  			depth--
   795  		}
   796  		if depth > 0 {
   797  			comment += p.s[:1]
   798  		}
   799  		p.s = p.s[1:]
   800  	}
   801  
   802  	return comment, depth == 0
   803  }
   804  
   805  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
   806  	dec := p.dec
   807  	if dec == nil {
   808  		dec = &rfc2047Decoder
   809  	}
   810  
   811  	// Substitute our own CharsetReader function so that we can tell
   812  	// whether an error from the Decode method was due to the
   813  	// CharsetReader (meaning the charset is invalid).
   814  	// We used to look for the charsetError type in the error result,
   815  	// but that behaves badly with CharsetReaders other than the
   816  	// one in rfc2047Decoder.
   817  	adec := *dec
   818  	charsetReaderError := false
   819  	adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   820  		if dec.CharsetReader == nil {
   821  			charsetReaderError = true
   822  			return nil, charsetError(charset)
   823  		}
   824  		r, err := dec.CharsetReader(charset, input)
   825  		if err != nil {
   826  			charsetReaderError = true
   827  		}
   828  		return r, err
   829  	}
   830  	word, err = adec.Decode(s)
   831  	if err == nil {
   832  		return word, true, nil
   833  	}
   834  
   835  	// If the error came from the character set reader
   836  	// (meaning the character set itself is invalid
   837  	// but the decoding worked fine until then),
   838  	// return the original text and the error,
   839  	// with isEncoded=true.
   840  	if charsetReaderError {
   841  		return s, true, err
   842  	}
   843  
   844  	// Ignore invalid RFC 2047 encoded-word errors.
   845  	return s, false, nil
   846  }
   847  
   848  var rfc2047Decoder = mime.WordDecoder{
   849  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   850  		return nil, charsetError(charset)
   851  	},
   852  }
   853  
   854  type charsetError string
   855  
   856  func (e charsetError) Error() string {
   857  	return fmt.Sprintf("charset not supported: %q", string(e))
   858  }
   859  
   860  // isAtext reports whether r is an RFC 5322 atext character.
   861  // If dot is true, period is included.
   862  func isAtext(r rune, dot bool) bool {
   863  	switch r {
   864  	case '.':
   865  		return dot
   866  
   867  	// RFC 5322 3.2.3. specials
   868  	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
   869  		return false
   870  	}
   871  	return isVchar(r)
   872  }
   873  
   874  // isQtext reports whether r is an RFC 5322 qtext character.
   875  func isQtext(r rune) bool {
   876  	// Printable US-ASCII, excluding backslash or quote.
   877  	if r == '\\' || r == '"' {
   878  		return false
   879  	}
   880  	return isVchar(r)
   881  }
   882  
   883  // quoteString renders a string as an RFC 5322 quoted-string.
   884  func quoteString(s string) string {
   885  	var b strings.Builder
   886  	b.WriteByte('"')
   887  	for _, r := range s {
   888  		if isQtext(r) || isWSP(r) {
   889  			b.WriteRune(r)
   890  		} else if isVchar(r) {
   891  			b.WriteByte('\\')
   892  			b.WriteRune(r)
   893  		}
   894  	}
   895  	b.WriteByte('"')
   896  	return b.String()
   897  }
   898  
   899  // isVchar reports whether r is an RFC 5322 VCHAR character.
   900  func isVchar(r rune) bool {
   901  	// Visible (printing) characters.
   902  	return '!' <= r && r <= '~' || isMultibyte(r)
   903  }
   904  
   905  // isMultibyte reports whether r is a multi-byte UTF-8 character
   906  // as supported by RFC 6532.
   907  func isMultibyte(r rune) bool {
   908  	return r >= utf8.RuneSelf
   909  }
   910  
   911  // isWSP reports whether r is a WSP (white space).
   912  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
   913  func isWSP(r rune) bool {
   914  	return r == ' ' || r == '\t'
   915  }
   916  

View as plain text