reader.go

Documentation: net/textproto

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textproto
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	_ "unsafe" // for linkname
    18  )
    19  
    20  // TODO: This should be a distinguishable error (ErrMessageTooLarge)
    21  // to allow mime/multipart to detect it.
    22  var errMessageTooLarge = errors.New("message too large")
    23  
    24  // A Reader implements convenience methods for reading requests
    25  // or responses from a text protocol network connection.
    26  type Reader struct {
    27  	R   *bufio.Reader
    28  	dot *dotReader
    29  	buf []byte // a re-usable buffer for readContinuedLineSlice
    30  }
    31  
    32  // NewReader returns a new [Reader] reading from r.
    33  //
    34  // To avoid denial of service attacks, the provided [bufio.Reader]
    35  // should be reading from an [io.LimitReader] or similar Reader to bound
    36  // the size of responses.
    37  func NewReader(r *bufio.Reader) *Reader {
    38  	return &Reader{R: r}
    39  }
    40  
    41  // ReadLine reads a single line from r,
    42  // eliding the final \n or \r\n from the returned string.
    43  func (r *Reader) ReadLine() (string, error) {
    44  	line, err := r.readLineSlice(-1)
    45  	return string(line), err
    46  }
    47  
    48  // ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string.
    49  func (r *Reader) ReadLineBytes() ([]byte, error) {
    50  	line, err := r.readLineSlice(-1)
    51  	if line != nil {
    52  		line = bytes.Clone(line)
    53  	}
    54  	return line, err
    55  }
    56  
    57  // readLineSlice reads a single line from r,
    58  // up to lim bytes long (or unlimited if lim is less than 0),
    59  // eliding the final \r or \r\n from the returned string.
    60  func (r *Reader) readLineSlice(lim int64) ([]byte, error) {
    61  	r.closeDot()
    62  	var line []byte
    63  	for {
    64  		l, more, err := r.R.ReadLine()
    65  		if err != nil {
    66  			return nil, err
    67  		}
    68  		if lim >= 0 && int64(len(line))+int64(len(l)) > lim {
    69  			return nil, errMessageTooLarge
    70  		}
    71  		// Avoid the copy if the first call produced a full line.
    72  		if line == nil && !more {
    73  			return l, nil
    74  		}
    75  		line = append(line, l...)
    76  		if !more {
    77  			break
    78  		}
    79  	}
    80  	return line, nil
    81  }
    82  
    83  // ReadContinuedLine reads a possibly continued line from r,
    84  // eliding the final trailing ASCII white space.
    85  // Lines after the first are considered continuations if they
    86  // begin with a space or tab character. In the returned data,
    87  // continuation lines are separated from the previous line
    88  // only by a single space: the newline and leading white space
    89  // are removed.
    90  //
    91  // For example, consider this input:
    92  //
    93  //	Line 1
    94  //	  continued...
    95  //	Line 2
    96  //
    97  // The first call to ReadContinuedLine will return "Line 1 continued..."
    98  // and the second will return "Line 2".
    99  //
   100  // Empty lines are never continued.
   101  func (r *Reader) ReadContinuedLine() (string, error) {
   102  	line, err := r.readContinuedLineSlice(-1, noValidation)
   103  	return string(line), err
   104  }
   105  
   106  // trim returns s with leading and trailing spaces and tabs removed.
   107  // It does not assume Unicode or UTF-8.
   108  func trim(s []byte) []byte {
   109  	i := 0
   110  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   111  		i++
   112  	}
   113  	n := len(s)
   114  	for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
   115  		n--
   116  	}
   117  	return s[i:n]
   118  }
   119  
   120  // ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but
   121  // returns a []byte instead of a string.
   122  func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   123  	line, err := r.readContinuedLineSlice(-1, noValidation)
   124  	if line != nil {
   125  		line = bytes.Clone(line)
   126  	}
   127  	return line, err
   128  }
   129  
   130  // readContinuedLineSlice reads continued lines from the reader buffer,
   131  // returning a byte slice with all lines. The validateFirstLine function
   132  // is run on the first read line, and if it returns an error then this
   133  // error is returned from readContinuedLineSlice.
   134  // It reads up to lim bytes of data (or unlimited if lim is less than 0).
   135  func (r *Reader) readContinuedLineSlice(lim int64, validateFirstLine func([]byte) error) ([]byte, error) {
   136  	if validateFirstLine == nil {
   137  		return nil, fmt.Errorf("missing validateFirstLine func")
   138  	}
   139  
   140  	// Read the first line.
   141  	line, err := r.readLineSlice(lim)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	if len(line) == 0 { // blank line - no continuation
   146  		return line, nil
   147  	}
   148  
   149  	if err := validateFirstLine(line); err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	// Optimistically assume that we have started to buffer the next line
   154  	// and it starts with an ASCII letter (the next header key), or a blank
   155  	// line, so we can avoid copying that buffered data around in memory
   156  	// and skipping over non-existent whitespace.
   157  	if r.R.Buffered() > 1 {
   158  		peek, _ := r.R.Peek(2)
   159  		if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
   160  			len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
   161  			return trim(line), nil
   162  		}
   163  	}
   164  
   165  	// ReadByte or the next readLineSlice will flush the read buffer;
   166  	// copy the slice into buf.
   167  	r.buf = append(r.buf[:0], trim(line)...)
   168  
   169  	if lim < 0 {
   170  		lim = math.MaxInt64
   171  	}
   172  	lim -= int64(len(r.buf))
   173  
   174  	// Read continuation lines.
   175  	for r.skipSpace() > 0 {
   176  		r.buf = append(r.buf, ' ')
   177  		if int64(len(r.buf)) >= lim {
   178  			return nil, errMessageTooLarge
   179  		}
   180  		line, err := r.readLineSlice(lim - int64(len(r.buf)))
   181  		if err != nil {
   182  			break
   183  		}
   184  		r.buf = append(r.buf, trim(line)...)
   185  	}
   186  	return r.buf, nil
   187  }
   188  
   189  // skipSpace skips R over all spaces and returns the number of bytes skipped.
   190  func (r *Reader) skipSpace() int {
   191  	n := 0
   192  	for {
   193  		c, err := r.R.ReadByte()
   194  		if err != nil {
   195  			// Bufio will keep err until next read.
   196  			break
   197  		}
   198  		if c != ' ' && c != '\t' {
   199  			r.R.UnreadByte()
   200  			break
   201  		}
   202  		n++
   203  	}
   204  	return n
   205  }
   206  
   207  func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   208  	line, err := r.ReadLine()
   209  	if err != nil {
   210  		return
   211  	}
   212  	return parseCodeLine(line, expectCode)
   213  }
   214  
   215  func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   216  	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   217  		err = ProtocolError("short response: " + line)
   218  		return
   219  	}
   220  	continued = line[3] == '-'
   221  	code, err = strconv.Atoi(line[0:3])
   222  	if err != nil || code < 100 {
   223  		err = ProtocolError("invalid response code: " + line)
   224  		return
   225  	}
   226  	message = line[4:]
   227  	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   228  		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   229  		100 <= expectCode && expectCode < 1000 && code != expectCode {
   230  		err = &Error{code, message}
   231  	}
   232  	return
   233  }
   234  
   235  // ReadCodeLine reads a response code line of the form
   236  //
   237  //	code message
   238  //
   239  // where code is a three-digit status code and the message
   240  // extends to the rest of the line. An example of such a line is:
   241  //
   242  //	220 plan9.bell-labs.com ESMTP
   243  //
   244  // If the prefix of the status does not match the digits in expectCode,
   245  // ReadCodeLine returns with err set to &Error{code, message}.
   246  // For example, if expectCode is 31, an error will be returned if
   247  // the status is not in the range [310,319].
   248  //
   249  // If the response is multi-line, ReadCodeLine returns an error.
   250  //
   251  // An expectCode <= 0 disables the check of the status code.
   252  func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   253  	code, continued, message, err := r.readCodeLine(expectCode)
   254  	if err == nil && continued {
   255  		err = ProtocolError("unexpected multi-line response: " + message)
   256  	}
   257  	return
   258  }
   259  
   260  // ReadResponse reads a multi-line response of the form:
   261  //
   262  //	code-message line 1
   263  //	code-message line 2
   264  //	...
   265  //	code message line n
   266  //
   267  // where code is a three-digit status code. The first line starts with the
   268  // code and a hyphen. The response is terminated by a line that starts
   269  // with the same code followed by a space. Each line in message is
   270  // separated by a newline (\n).
   271  //
   272  // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
   273  // details of another form of response accepted:
   274  //
   275  //	code-message line 1
   276  //	message line 2
   277  //	...
   278  //	code message line n
   279  //
   280  // If the prefix of the status does not match the digits in expectCode,
   281  // ReadResponse returns with err set to &Error{code, message}.
   282  // For example, if expectCode is 31, an error will be returned if
   283  // the status is not in the range [310,319].
   284  //
   285  // An expectCode <= 0 disables the check of the status code.
   286  func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   287  	code, continued, message, err := r.readCodeLine(expectCode)
   288  	multi := continued
   289  	for continued {
   290  		line, err := r.ReadLine()
   291  		if err != nil {
   292  			return 0, "", err
   293  		}
   294  
   295  		var code2 int
   296  		var moreMessage string
   297  		code2, continued, moreMessage, err = parseCodeLine(line, 0)
   298  		if err != nil || code2 != code {
   299  			message += "\n" + strings.TrimRight(line, "\r\n")
   300  			continued = true
   301  			continue
   302  		}
   303  		message += "\n" + moreMessage
   304  	}
   305  	if err != nil && multi && message != "" {
   306  		// replace one line error message with all lines (full message)
   307  		err = &Error{code, message}
   308  	}
   309  	return
   310  }
   311  
   312  // DotReader returns a new [Reader] that satisfies Reads using the
   313  // decoded text of a dot-encoded block read from r.
   314  // The returned Reader is only valid until the next call
   315  // to a method on r.
   316  //
   317  // Dot encoding is a common framing used for data blocks
   318  // in text protocols such as SMTP.  The data consists of a sequence
   319  // of lines, each of which ends in "\r\n".  The sequence itself
   320  // ends at a line containing just a dot: ".\r\n".  Lines beginning
   321  // with a dot are escaped with an additional dot to avoid
   322  // looking like the end of the sequence.
   323  //
   324  // The decoded form returned by the Reader's Read method
   325  // rewrites the "\r\n" line endings into the simpler "\n",
   326  // removes leading dot escapes if present, and stops with error [io.EOF]
   327  // after consuming (and discarding) the end-of-sequence line.
   328  func (r *Reader) DotReader() io.Reader {
   329  	r.closeDot()
   330  	r.dot = &dotReader{r: r}
   331  	return r.dot
   332  }
   333  
   334  type dotReader struct {
   335  	r     *Reader
   336  	state int
   337  }
   338  
   339  // Read satisfies reads by decoding dot-encoded data read from d.r.
   340  func (d *dotReader) Read(b []byte) (n int, err error) {
   341  	// Run data through a simple state machine to
   342  	// elide leading dots, rewrite trailing \r\n into \n,
   343  	// and detect ending .\r\n line.
   344  	const (
   345  		stateBeginLine = iota // beginning of line; initial state; must be zero
   346  		stateDot              // read . at beginning of line
   347  		stateDotCR            // read .\r at beginning of line
   348  		stateCR               // read \r (possibly at end of line)
   349  		stateData             // reading data in middle of line
   350  		stateEOF              // reached .\r\n end marker line
   351  	)
   352  	br := d.r.R
   353  	for n < len(b) && d.state != stateEOF {
   354  		var c byte
   355  		c, err = br.ReadByte()
   356  		if err != nil {
   357  			if err == io.EOF {
   358  				err = io.ErrUnexpectedEOF
   359  			}
   360  			break
   361  		}
   362  		switch d.state {
   363  		case stateBeginLine:
   364  			if c == '.' {
   365  				d.state = stateDot
   366  				continue
   367  			}
   368  			if c == '\r' {
   369  				d.state = stateCR
   370  				continue
   371  			}
   372  			d.state = stateData
   373  
   374  		case stateDot:
   375  			if c == '\r' {
   376  				d.state = stateDotCR
   377  				continue
   378  			}
   379  			if c == '\n' {
   380  				d.state = stateEOF
   381  				continue
   382  			}
   383  			d.state = stateData
   384  
   385  		case stateDotCR:
   386  			if c == '\n' {
   387  				d.state = stateEOF
   388  				continue
   389  			}
   390  			// Not part of .\r\n.
   391  			// Consume leading dot and emit saved \r.
   392  			br.UnreadByte()
   393  			c = '\r'
   394  			d.state = stateData
   395  
   396  		case stateCR:
   397  			if c == '\n' {
   398  				d.state = stateBeginLine
   399  				break
   400  			}
   401  			// Not part of \r\n. Emit saved \r
   402  			br.UnreadByte()
   403  			c = '\r'
   404  			d.state = stateData
   405  
   406  		case stateData:
   407  			if c == '\r' {
   408  				d.state = stateCR
   409  				continue
   410  			}
   411  			if c == '\n' {
   412  				d.state = stateBeginLine
   413  			}
   414  		}
   415  		b[n] = c
   416  		n++
   417  	}
   418  	if err == nil && d.state == stateEOF {
   419  		err = io.EOF
   420  	}
   421  	if err != nil && d.r.dot == d {
   422  		d.r.dot = nil
   423  	}
   424  	return
   425  }
   426  
   427  // closeDot drains the current DotReader if any,
   428  // making sure that it reads until the ending dot line.
   429  func (r *Reader) closeDot() {
   430  	if r.dot == nil {
   431  		return
   432  	}
   433  	buf := make([]byte, 128)
   434  	for r.dot != nil {
   435  		// When Read reaches EOF or an error,
   436  		// it will set r.dot == nil.
   437  		r.dot.Read(buf)
   438  	}
   439  }
   440  
   441  // ReadDotBytes reads a dot-encoding and returns the decoded data.
   442  //
   443  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   444  func (r *Reader) ReadDotBytes() ([]byte, error) {
   445  	return io.ReadAll(r.DotReader())
   446  }
   447  
   448  // ReadDotLines reads a dot-encoding and returns a slice
   449  // containing the decoded lines, with the final \r\n or \n elided from each.
   450  //
   451  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   452  func (r *Reader) ReadDotLines() ([]string, error) {
   453  	// We could use ReadDotBytes and then Split it,
   454  	// but reading a line at a time avoids needing a
   455  	// large contiguous block of memory and is simpler.
   456  	var v []string
   457  	var err error
   458  	for {
   459  		var line string
   460  		line, err = r.ReadLine()
   461  		if err != nil {
   462  			if err == io.EOF {
   463  				err = io.ErrUnexpectedEOF
   464  			}
   465  			break
   466  		}
   467  
   468  		// Dot by itself marks end; otherwise cut one dot.
   469  		if len(line) > 0 && line[0] == '.' {
   470  			if len(line) == 1 {
   471  				break
   472  			}
   473  			line = line[1:]
   474  		}
   475  		v = append(v, line)
   476  	}
   477  	return v, err
   478  }
   479  
   480  var colon = []byte(":")
   481  
   482  // ReadMIMEHeader reads a MIME-style header from r.
   483  // The header is a sequence of possibly continued Key: Value lines
   484  // ending in a blank line.
   485  // The returned map m maps [CanonicalMIMEHeaderKey](key) to a
   486  // sequence of values in the same order encountered in the input.
   487  //
   488  // For example, consider this input:
   489  //
   490  //	My-Key: Value 1
   491  //	Long-Key: Even
   492  //	       Longer Value
   493  //	My-Key: Value 2
   494  //
   495  // Given that input, ReadMIMEHeader returns the map:
   496  //
   497  //	map[string][]string{
   498  //		"My-Key": {"Value 1", "Value 2"},
   499  //		"Long-Key": {"Even Longer Value"},
   500  //	}
   501  func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   502  	return readMIMEHeader(r, math.MaxInt64, math.MaxInt64)
   503  }
   504  
   505  // readMIMEHeader is accessed from mime/multipart.
   506  //go:linkname readMIMEHeader
   507  
   508  // readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size.
   509  // It is called by the mime/multipart package.
   510  func readMIMEHeader(r *Reader, maxMemory, maxHeaders int64) (MIMEHeader, error) {
   511  	// Avoid lots of small slice allocations later by allocating one
   512  	// large one ahead of time which we'll cut up into smaller
   513  	// slices. If this isn't big enough later, we allocate small ones.
   514  	var strs []string
   515  	hint := r.upcomingHeaderKeys()
   516  	if hint > 0 {
   517  		if hint > 1000 {
   518  			hint = 1000 // set a cap to avoid overallocation
   519  		}
   520  		strs = make([]string, hint)
   521  	}
   522  
   523  	m := make(MIMEHeader, hint)
   524  
   525  	// Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry.
   526  	// Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large
   527  	// MIMEHeaders average about 200 bytes per entry.
   528  	maxMemory -= 400
   529  	const mapEntryOverhead = 200
   530  
   531  	// The first line cannot start with a leading space.
   532  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
   533  		const errorLimit = 80 // arbitrary limit on how much of the line we'll quote
   534  		line, err := r.readLineSlice(errorLimit)
   535  		if err != nil {
   536  			return m, err
   537  		}
   538  		return m, ProtocolError("malformed MIME header initial line: " + string(line))
   539  	}
   540  
   541  	for {
   542  		kv, err := r.readContinuedLineSlice(maxMemory, mustHaveFieldNameColon)
   543  		if len(kv) == 0 {
   544  			return m, err
   545  		}
   546  
   547  		// Key ends at first colon.
   548  		k, v, ok := bytes.Cut(kv, colon)
   549  		if !ok {
   550  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   551  		}
   552  		key, ok := canonicalMIMEHeaderKey(k)
   553  		if !ok {
   554  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   555  		}
   556  		for _, c := range v {
   557  			if !validHeaderValueByte(c) {
   558  				return m, ProtocolError("malformed MIME header line: " + string(kv))
   559  			}
   560  		}
   561  
   562  		maxHeaders--
   563  		if maxHeaders < 0 {
   564  			return nil, errMessageTooLarge
   565  		}
   566  
   567  		// Skip initial spaces in value.
   568  		value := string(bytes.TrimLeft(v, " \t"))
   569  
   570  		vv := m[key]
   571  		if vv == nil {
   572  			maxMemory -= int64(len(key))
   573  			maxMemory -= mapEntryOverhead
   574  		}
   575  		maxMemory -= int64(len(value))
   576  		if maxMemory < 0 {
   577  			return m, errMessageTooLarge
   578  		}
   579  		if vv == nil && len(strs) > 0 {
   580  			// More than likely this will be a single-element key.
   581  			// Most headers aren't multi-valued.
   582  			// Set the capacity on strs[0] to 1, so any future append
   583  			// won't extend the slice into the other strings.
   584  			vv, strs = strs[:1:1], strs[1:]
   585  			vv[0] = value
   586  			m[key] = vv
   587  		} else {
   588  			m[key] = append(vv, value)
   589  		}
   590  
   591  		if err != nil {
   592  			return m, err
   593  		}
   594  	}
   595  }
   596  
   597  // noValidation is a no-op validation func for readContinuedLineSlice
   598  // that permits any lines.
   599  func noValidation(_ []byte) error { return nil }
   600  
   601  // mustHaveFieldNameColon ensures that, per RFC 7230, the
   602  // field-name is on a single line, so the first line must
   603  // contain a colon.
   604  func mustHaveFieldNameColon(line []byte) error {
   605  	if bytes.IndexByte(line, ':') < 0 {
   606  		return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
   607  	}
   608  	return nil
   609  }
   610  
   611  var nl = []byte("\n")
   612  
   613  // upcomingHeaderKeys returns an approximation of the number of keys
   614  // that will be in this header. If it gets confused, it returns 0.
   615  func (r *Reader) upcomingHeaderKeys() (n int) {
   616  	// Try to determine the 'hint' size.
   617  	r.R.Peek(1) // force a buffer load if empty
   618  	s := r.R.Buffered()
   619  	if s == 0 {
   620  		return
   621  	}
   622  	peek, _ := r.R.Peek(s)
   623  	for len(peek) > 0 && n < 1000 {
   624  		var line []byte
   625  		line, peek, _ = bytes.Cut(peek, nl)
   626  		if len(line) == 0 || (len(line) == 1 && line[0] == '\r') {
   627  			// Blank line separating headers from the body.
   628  			break
   629  		}
   630  		if line[0] == ' ' || line[0] == '\t' {
   631  			// Folded continuation of the previous line.
   632  			continue
   633  		}
   634  		n++
   635  	}
   636  	return n
   637  }
   638  
   639  // CanonicalMIMEHeaderKey returns the canonical format of the
   640  // MIME header key s. The canonicalization converts the first
   641  // letter and any letter following a hyphen to upper case;
   642  // the rest are converted to lowercase. For example, the
   643  // canonical key for "accept-encoding" is "Accept-Encoding".
   644  // MIME header keys are assumed to be ASCII only.
   645  // If s contains a space or invalid header field bytes, it is
   646  // returned without modifications.
   647  func CanonicalMIMEHeaderKey(s string) string {
   648  	// Quick check for canonical encoding.
   649  	upper := true
   650  	for i := 0; i < len(s); i++ {
   651  		c := s[i]
   652  		if !validHeaderFieldByte(c) {
   653  			return s
   654  		}
   655  		if upper && 'a' <= c && c <= 'z' {
   656  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   657  			return s
   658  		}
   659  		if !upper && 'A' <= c && c <= 'Z' {
   660  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   661  			return s
   662  		}
   663  		upper = c == '-'
   664  	}
   665  	return s
   666  }
   667  
   668  const toLower = 'a' - 'A'
   669  
   670  // validHeaderFieldByte reports whether c is a valid byte in a header
   671  // field name. RFC 7230 says:
   672  //
   673  //	header-field   = field-name ":" OWS field-value OWS
   674  //	field-name     = token
   675  //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   676  //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   677  //	token = 1*tchar
   678  func validHeaderFieldByte(c byte) bool {
   679  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   680  	// so that the byte c can be tested with a shift and an and.
   681  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
   682  	// and this function will return false.
   683  	const mask = 0 |
   684  		(1<<(10)-1)<<'0' |
   685  		(1<<(26)-1)<<'a' |
   686  		(1<<(26)-1)<<'A' |
   687  		1<<'!' |
   688  		1<<'#' |
   689  		1<<'$' |
   690  		1<<'%' |
   691  		1<<'&' |
   692  		1<<'\'' |
   693  		1<<'*' |
   694  		1<<'+' |
   695  		1<<'-' |
   696  		1<<'.' |
   697  		1<<'^' |
   698  		1<<'_' |
   699  		1<<'`' |
   700  		1<<'|' |
   701  		1<<'~'
   702  	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
   703  		(uint64(1)<<(c-64))&(mask>>64)) != 0
   704  }
   705  
   706  // validHeaderValueByte reports whether c is a valid byte in a header
   707  // field value. RFC 7230 says:
   708  //
   709  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   710  //	field-vchar    = VCHAR / obs-text
   711  //	obs-text       = %x80-FF
   712  //
   713  // RFC 5234 says:
   714  //
   715  //	HTAB           =  %x09
   716  //	SP             =  %x20
   717  //	VCHAR          =  %x21-7E
   718  func validHeaderValueByte(c byte) bool {
   719  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   720  	// so that the byte c can be tested with a shift and an and.
   721  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
   722  	// Since this is the obs-text range, we invert the mask to
   723  	// create a bitmap with 1s for disallowed bytes.
   724  	const mask = 0 |
   725  		(1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
   726  		1<<0x20 | // SP: %x20
   727  		1<<0x09 // HTAB: %x09
   728  	return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
   729  		(uint64(1)<<(c-64))&^(mask>>64)) == 0
   730  }
   731  
   732  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
   733  // allowed to mutate the provided byte slice before returning the
   734  // string.
   735  //
   736  // For invalid inputs (if a contains spaces or non-token bytes), a
   737  // is unchanged and a string copy is returned.
   738  //
   739  // ok is true if the header key contains only valid characters and spaces.
   740  // ReadMIMEHeader accepts header keys containing spaces, but does not
   741  // canonicalize them.
   742  func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
   743  	if len(a) == 0 {
   744  		return "", false
   745  	}
   746  
   747  	// See if a looks like a header key. If not, return it unchanged.
   748  	noCanon := false
   749  	for _, c := range a {
   750  		if validHeaderFieldByte(c) {
   751  			continue
   752  		}
   753  		// Don't canonicalize.
   754  		if c == ' ' {
   755  			// We accept invalid headers with a space before the
   756  			// colon, but must not canonicalize them.
   757  			// See https://go.dev/issue/34540.
   758  			noCanon = true
   759  			continue
   760  		}
   761  		return string(a), false
   762  	}
   763  	if noCanon {
   764  		return string(a), true
   765  	}
   766  
   767  	upper := true
   768  	for i, c := range a {
   769  		// Canonicalize: first letter upper case
   770  		// and upper case after each dash.
   771  		// (Host, User-Agent, If-Modified-Since).
   772  		// MIME headers are ASCII only, so no Unicode issues.
   773  		if upper && 'a' <= c && c <= 'z' {
   774  			c -= toLower
   775  		} else if !upper && 'A' <= c && c <= 'Z' {
   776  			c += toLower
   777  		}
   778  		a[i] = c
   779  		upper = c == '-' // for next time
   780  	}
   781  	commonHeaderOnce.Do(initCommonHeader)
   782  	// The compiler recognizes m[string(byteSlice)] as a special
   783  	// case, so a copy of a's bytes into a new string does not
   784  	// happen in this map lookup:
   785  	if v := commonHeader[string(a)]; v != "" {
   786  		return v, true
   787  	}
   788  	return string(a), true
   789  }
   790  
   791  // commonHeader interns common header strings.
   792  var commonHeader map[string]string
   793  
   794  var commonHeaderOnce sync.Once
   795  
   796  func initCommonHeader() {
   797  	commonHeader = make(map[string]string)
   798  	for _, v := range []string{
   799  		"Accept",
   800  		"Accept-Charset",
   801  		"Accept-Encoding",
   802  		"Accept-Language",
   803  		"Accept-Ranges",
   804  		"Cache-Control",
   805  		"Cc",
   806  		"Connection",
   807  		"Content-Id",
   808  		"Content-Language",
   809  		"Content-Length",
   810  		"Content-Transfer-Encoding",
   811  		"Content-Type",
   812  		"Cookie",
   813  		"Date",
   814  		"Dkim-Signature",
   815  		"Etag",
   816  		"Expires",
   817  		"From",
   818  		"Host",
   819  		"If-Modified-Since",
   820  		"If-None-Match",
   821  		"In-Reply-To",
   822  		"Last-Modified",
   823  		"Location",
   824  		"Message-Id",
   825  		"Mime-Version",
   826  		"Pragma",
   827  		"Received",
   828  		"Return-Path",
   829  		"Server",
   830  		"Set-Cookie",
   831  		"Subject",
   832  		"To",
   833  		"User-Agent",
   834  		"Via",
   835  		"X-Forwarded-For",
   836  		"X-Imforwards",
   837  		"X-Powered-By",
   838  	} {
   839  		commonHeader[v] = v
   840  	}
   841  }
   842
View as plain text