text.go

Documentation: go/doc/comment

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package comment
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"sort"
    11  	"strings"
    12  	"unicode/utf8"
    13  )
    14  
    15  // A textPrinter holds the state needed for printing a Doc as plain text.
    16  type textPrinter struct {
    17  	*Printer
    18  	long       strings.Builder
    19  	prefix     string
    20  	codePrefix string
    21  	width      int
    22  }
    23  
    24  // Text returns a textual formatting of the [Doc].
    25  // See the [Printer] documentation for ways to customize the text output.
    26  func (p *Printer) Text(d *Doc) []byte {
    27  	tp := &textPrinter{
    28  		Printer:    p,
    29  		prefix:     p.TextPrefix,
    30  		codePrefix: p.TextCodePrefix,
    31  		width:      p.TextWidth,
    32  	}
    33  	if tp.codePrefix == "" {
    34  		tp.codePrefix = p.TextPrefix + "\t"
    35  	}
    36  	if tp.width == 0 {
    37  		tp.width = 80 - utf8.RuneCountInString(tp.prefix)
    38  	}
    39  
    40  	var out bytes.Buffer
    41  	for i, x := range d.Content {
    42  		if i > 0 && blankBefore(x) {
    43  			out.WriteString(tp.prefix)
    44  			writeNL(&out)
    45  		}
    46  		tp.block(&out, x)
    47  	}
    48  	anyUsed := false
    49  	for _, def := range d.Links {
    50  		if def.Used {
    51  			anyUsed = true
    52  			break
    53  		}
    54  	}
    55  	if anyUsed {
    56  		writeNL(&out)
    57  		for _, def := range d.Links {
    58  			if def.Used {
    59  				fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
    60  			}
    61  		}
    62  	}
    63  	return out.Bytes()
    64  }
    65  
    66  // writeNL calls out.WriteByte('\n')
    67  // but first trims trailing spaces on the previous line.
    68  func writeNL(out *bytes.Buffer) {
    69  	// Trim trailing spaces.
    70  	data := out.Bytes()
    71  	n := 0
    72  	for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') {
    73  		n++
    74  	}
    75  	if n > 0 {
    76  		out.Truncate(len(data) - n)
    77  	}
    78  	out.WriteByte('\n')
    79  }
    80  
    81  // block prints the block x to out.
    82  func (p *textPrinter) block(out *bytes.Buffer, x Block) {
    83  	switch x := x.(type) {
    84  	default:
    85  		fmt.Fprintf(out, "?%T\n", x)
    86  
    87  	case *Paragraph:
    88  		out.WriteString(p.prefix)
    89  		p.text(out, "", x.Text)
    90  
    91  	case *Heading:
    92  		out.WriteString(p.prefix)
    93  		out.WriteString("# ")
    94  		p.text(out, "", x.Text)
    95  
    96  	case *Code:
    97  		text := x.Text
    98  		for text != "" {
    99  			var line string
   100  			line, text, _ = strings.Cut(text, "\n")
   101  			if line != "" {
   102  				out.WriteString(p.codePrefix)
   103  				out.WriteString(line)
   104  			}
   105  			writeNL(out)
   106  		}
   107  
   108  	case *List:
   109  		loose := x.BlankBetween()
   110  		for i, item := range x.Items {
   111  			if i > 0 && loose {
   112  				out.WriteString(p.prefix)
   113  				writeNL(out)
   114  			}
   115  			out.WriteString(p.prefix)
   116  			out.WriteString(" ")
   117  			if item.Number == "" {
   118  				out.WriteString(" - ")
   119  			} else {
   120  				out.WriteString(item.Number)
   121  				out.WriteString(". ")
   122  			}
   123  			for i, blk := range item.Content {
   124  				const fourSpace = "    "
   125  				if i > 0 {
   126  					writeNL(out)
   127  					out.WriteString(p.prefix)
   128  					out.WriteString(fourSpace)
   129  				}
   130  				p.text(out, fourSpace, blk.(*Paragraph).Text)
   131  			}
   132  		}
   133  	}
   134  }
   135  
   136  // text prints the text sequence x to out.
   137  func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) {
   138  	p.oneLongLine(&p.long, x)
   139  	words := strings.Fields(p.long.String())
   140  	p.long.Reset()
   141  
   142  	var seq []int
   143  	if p.width < 0 || len(words) == 0 {
   144  		seq = []int{0, len(words)} // one long line
   145  	} else {
   146  		seq = wrap(words, p.width-utf8.RuneCountInString(indent))
   147  	}
   148  	for i := 0; i+1 < len(seq); i++ {
   149  		if i > 0 {
   150  			out.WriteString(p.prefix)
   151  			out.WriteString(indent)
   152  		}
   153  		for j, w := range words[seq[i]:seq[i+1]] {
   154  			if j > 0 {
   155  				out.WriteString(" ")
   156  			}
   157  			out.WriteString(w)
   158  		}
   159  		writeNL(out)
   160  	}
   161  }
   162  
   163  // oneLongLine prints the text sequence x to out as one long line,
   164  // without worrying about line wrapping.
   165  // Explicit links have the [ ] dropped to improve readability.
   166  func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) {
   167  	for _, t := range x {
   168  		switch t := t.(type) {
   169  		case Plain:
   170  			out.WriteString(string(t))
   171  		case Italic:
   172  			out.WriteString(string(t))
   173  		case *Link:
   174  			p.oneLongLine(out, t.Text)
   175  		case *DocLink:
   176  			p.oneLongLine(out, t.Text)
   177  		}
   178  	}
   179  }
   180  
   181  // wrap wraps words into lines of at most max runes,
   182  // minimizing the sum of the squares of the leftover lengths
   183  // at the end of each line (except the last, of course),
   184  // with a preference for ending lines at punctuation (.,:;).
   185  //
   186  // The returned slice gives the indexes of the first words
   187  // on each line in the wrapped text with a final entry of len(words).
   188  // Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
   189  // ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
   190  //
   191  // The implementation runs in O(n log n) time, where n = len(words),
   192  // using the algorithm described in D. S. Hirschberg and L. L. Larmore,
   193  // “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
   194  //
   195  // [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
   196  func wrap(words []string, max int) (seq []int) {
   197  	// The algorithm requires that our scoring function be concave,
   198  	// meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
   199  	// weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
   200  	//
   201  	// Our weights are two-element pairs [hi, lo]
   202  	// ordered by elementwise comparison.
   203  	// The hi entry counts the weight for lines that are longer than max,
   204  	// and the lo entry counts the weight for lines that are not.
   205  	// This forces the algorithm to first minimize the number of lines
   206  	// that are longer than max, which correspond to lines with
   207  	// single very long words. Having done that, it can move on to
   208  	// minimizing the lo score, which is more interesting.
   209  	//
   210  	// The lo score is the sum for each line of the square of the
   211  	// number of spaces remaining at the end of the line and a
   212  	// penalty of 64 given out for not ending the line in a
   213  	// punctuation character (.,:;).
   214  	// The penalty is somewhat arbitrarily chosen by trying
   215  	// different amounts and judging how nice the wrapped text looks.
   216  	// Roughly speaking, using 64 means that we are willing to
   217  	// end a line with eight blank spaces in order to end at a
   218  	// punctuation character, even if the next word would fit in
   219  	// those spaces.
   220  	//
   221  	// We care about ending in punctuation characters because
   222  	// it makes the text easier to skim if not too many sentences
   223  	// or phrases begin with a single word on the previous line.
   224  
   225  	// A score is the score (also called weight) for a given line.
   226  	// add and cmp add and compare scores.
   227  	type score struct {
   228  		hi int64
   229  		lo int64
   230  	}
   231  	add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
   232  	cmp := func(s, t score) int {
   233  		switch {
   234  		case s.hi < t.hi:
   235  			return -1
   236  		case s.hi > t.hi:
   237  			return +1
   238  		case s.lo < t.lo:
   239  			return -1
   240  		case s.lo > t.lo:
   241  			return +1
   242  		}
   243  		return 0
   244  	}
   245  
   246  	// total[j] is the total number of runes
   247  	// (including separating spaces) in words[:j].
   248  	total := make([]int, len(words)+1)
   249  	total[0] = 0
   250  	for i, s := range words {
   251  		total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
   252  	}
   253  
   254  	// weight returns weight(i, j).
   255  	weight := func(i, j int) score {
   256  		// On the last line, there is zero weight for being too short.
   257  		n := total[j] - 1 - total[i]
   258  		if j == len(words) && n <= max {
   259  			return score{0, 0}
   260  		}
   261  
   262  		// Otherwise the weight is the penalty plus the square of the number of
   263  		// characters remaining on the line or by which the line goes over.
   264  		// In the latter case, that value goes in the hi part of the score.
   265  		// (See note above.)
   266  		p := wrapPenalty(words[j-1])
   267  		v := int64(max-n) * int64(max-n)
   268  		if n > max {
   269  			return score{v, p}
   270  		}
   271  		return score{0, v + p}
   272  	}
   273  
   274  	// The rest of this function is “The Basic Algorithm” from
   275  	// Hirschberg and Larmore's conference paper,
   276  	// using the same names as in the paper.
   277  	f := []score{{0, 0}}
   278  	g := func(i, j int) score { return add(f[i], weight(i, j)) }
   279  
   280  	bridge := func(a, b, c int) bool {
   281  		k := c + sort.Search(len(words)+1-c, func(k int) bool {
   282  			k += c
   283  			return cmp(g(a, k), g(b, k)) > 0
   284  		})
   285  		if k > len(words) {
   286  			return true
   287  		}
   288  		return cmp(g(c, k), g(b, k)) <= 0
   289  	}
   290  
   291  	// d is a one-ended deque implemented as a slice.
   292  	d := make([]int, 1, len(words))
   293  	d[0] = 0
   294  	bestleft := make([]int, 1, len(words))
   295  	bestleft[0] = -1
   296  	for m := 1; m < len(words); m++ {
   297  		f = append(f, g(d[0], m))
   298  		bestleft = append(bestleft, d[0])
   299  		for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
   300  			d = d[1:] // “Retire”
   301  		}
   302  		for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
   303  			d = d[:len(d)-1] // “Fire”
   304  		}
   305  		if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
   306  			d = append(d, m) // “Hire”
   307  			// The next few lines are not in the paper but are necessary
   308  			// to handle two-word inputs correctly. It appears to be
   309  			// just a bug in the paper's pseudocode.
   310  			if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
   311  				d = d[1:]
   312  			}
   313  		}
   314  	}
   315  	bestleft = append(bestleft, d[0])
   316  
   317  	// Recover least weight sequence from bestleft.
   318  	n := 1
   319  	for m := len(words); m > 0; m = bestleft[m] {
   320  		n++
   321  	}
   322  	seq = make([]int, n)
   323  	for m := len(words); m > 0; m = bestleft[m] {
   324  		n--
   325  		seq[n] = m
   326  	}
   327  	return seq
   328  }
   329  
   330  // wrapPenalty is the penalty for inserting a line break after word s.
   331  func wrapPenalty(s string) int64 {
   332  	switch s[len(s)-1] {
   333  	case '.', ',', ':', ';':
   334  		return 0
   335  	}
   336  	return 64
   337  }
   338
View as plain text