// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package comment import ( "bytes" "fmt" "sort" "strings" "unicode/utf8" ) // A textPrinter holds the state needed for printing a Doc as plain text. type textPrinter struct { *Printer long strings.Builder prefix string codePrefix string width int } // Text returns a textual formatting of the [Doc]. // See the [Printer] documentation for ways to customize the text output. func (p *Printer) Text(d *Doc) []byte { tp := &textPrinter{ Printer: p, prefix: p.TextPrefix, codePrefix: p.TextCodePrefix, width: p.TextWidth, } if tp.codePrefix == "" { tp.codePrefix = p.TextPrefix + "\t" } if tp.width == 0 { tp.width = 80 - utf8.RuneCountInString(tp.prefix) } var out bytes.Buffer for i, x := range d.Content { if i > 0 && blankBefore(x) { out.WriteString(tp.prefix) writeNL(&out) } tp.block(&out, x) } anyUsed := false for _, def := range d.Links { if def.Used { anyUsed = true break } } if anyUsed { writeNL(&out) for _, def := range d.Links { if def.Used { fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL) } } } return out.Bytes() } // writeNL calls out.WriteByte('\n') // but first trims trailing spaces on the previous line. func writeNL(out *bytes.Buffer) { // Trim trailing spaces. data := out.Bytes() n := 0 for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') { n++ } if n > 0 { out.Truncate(len(data) - n) } out.WriteByte('\n') } // block prints the block x to out. func (p *textPrinter) block(out *bytes.Buffer, x Block) { switch x := x.(type) { default: fmt.Fprintf(out, "?%T\n", x) case *Paragraph: out.WriteString(p.prefix) p.text(out, "", x.Text) case *Heading: out.WriteString(p.prefix) out.WriteString("# ") p.text(out, "", x.Text) case *Code: text := x.Text for text != "" { var line string line, text, _ = strings.Cut(text, "\n") if line != "" { out.WriteString(p.codePrefix) out.WriteString(line) } writeNL(out) } case *List: loose := x.BlankBetween() for i, item := range x.Items { if i > 0 && loose { out.WriteString(p.prefix) writeNL(out) } out.WriteString(p.prefix) out.WriteString(" ") if item.Number == "" { out.WriteString(" - ") } else { out.WriteString(item.Number) out.WriteString(". ") } for i, blk := range item.Content { const fourSpace = " " if i > 0 { writeNL(out) out.WriteString(p.prefix) out.WriteString(fourSpace) } p.text(out, fourSpace, blk.(*Paragraph).Text) } } } } // text prints the text sequence x to out. func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) { p.oneLongLine(&p.long, x) words := strings.Fields(p.long.String()) p.long.Reset() var seq []int if p.width < 0 || len(words) == 0 { seq = []int{0, len(words)} // one long line } else { seq = wrap(words, p.width-utf8.RuneCountInString(indent)) } for i := 0; i+1 < len(seq); i++ { if i > 0 { out.WriteString(p.prefix) out.WriteString(indent) } for j, w := range words[seq[i]:seq[i+1]] { if j > 0 { out.WriteString(" ") } out.WriteString(w) } writeNL(out) } } // oneLongLine prints the text sequence x to out as one long line, // without worrying about line wrapping. // Explicit links have the [ ] dropped to improve readability. func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) { for _, t := range x { switch t := t.(type) { case Plain: out.WriteString(string(t)) case Italic: out.WriteString(string(t)) case *Link: p.oneLongLine(out, t.Text) case *DocLink: p.oneLongLine(out, t.Text) } } } // wrap wraps words into lines of at most max runes, // minimizing the sum of the squares of the leftover lengths // at the end of each line (except the last, of course), // with a preference for ending lines at punctuation (.,:;). // // The returned slice gives the indexes of the first words // on each line in the wrapped text with a final entry of len(words). // Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]], // ..., words[seq[len(seq)-2]:seq[len(seq)-1]]. // // The implementation runs in O(n log n) time, where n = len(words), // using the algorithm described in D. S. Hirschberg and L. L. Larmore, // “[The least weight subsequence problem],” FOCS 1985, pp. 137-143. // // [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60 func wrap(words []string, max int) (seq []int) { // The algorithm requires that our scoring function be concave, // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁, // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀). // // Our weights are two-element pairs [hi, lo] // ordered by elementwise comparison. // The hi entry counts the weight for lines that are longer than max, // and the lo entry counts the weight for lines that are not. // This forces the algorithm to first minimize the number of lines // that are longer than max, which correspond to lines with // single very long words. Having done that, it can move on to // minimizing the lo score, which is more interesting. // // The lo score is the sum for each line of the square of the // number of spaces remaining at the end of the line and a // penalty of 64 given out for not ending the line in a // punctuation character (.,:;). // The penalty is somewhat arbitrarily chosen by trying // different amounts and judging how nice the wrapped text looks. // Roughly speaking, using 64 means that we are willing to // end a line with eight blank spaces in order to end at a // punctuation character, even if the next word would fit in // those spaces. // // We care about ending in punctuation characters because // it makes the text easier to skim if not too many sentences // or phrases begin with a single word on the previous line. // A score is the score (also called weight) for a given line. // add and cmp add and compare scores. type score struct { hi int64 lo int64 } add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} } cmp := func(s, t score) int { switch { case s.hi < t.hi: return -1 case s.hi > t.hi: return +1 case s.lo < t.lo: return -1 case s.lo > t.lo: return +1 } return 0 } // total[j] is the total number of runes // (including separating spaces) in words[:j]. total := make([]int, len(words)+1) total[0] = 0 for i, s := range words { total[1+i] = total[i] + utf8.RuneCountInString(s) + 1 } // weight returns weight(i, j). weight := func(i, j int) score { // On the last line, there is zero weight for being too short. n := total[j] - 1 - total[i] if j == len(words) && n <= max { return score{0, 0} } // Otherwise the weight is the penalty plus the square of the number of // characters remaining on the line or by which the line goes over. // In the latter case, that value goes in the hi part of the score. // (See note above.) p := wrapPenalty(words[j-1]) v := int64(max-n) * int64(max-n) if n > max { return score{v, p} } return score{0, v + p} } // The rest of this function is “The Basic Algorithm” from // Hirschberg and Larmore's conference paper, // using the same names as in the paper. f := []score{{0, 0}} g := func(i, j int) score { return add(f[i], weight(i, j)) } bridge := func(a, b, c int) bool { k := c + sort.Search(len(words)+1-c, func(k int) bool { k += c return cmp(g(a, k), g(b, k)) > 0 }) if k > len(words) { return true } return cmp(g(c, k), g(b, k)) <= 0 } // d is a one-ended deque implemented as a slice. d := make([]int, 1, len(words)) d[0] = 0 bestleft := make([]int, 1, len(words)) bestleft[0] = -1 for m := 1; m < len(words); m++ { f = append(f, g(d[0], m)) bestleft = append(bestleft, d[0]) for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { d = d[1:] // “Retire” } for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) { d = d[:len(d)-1] // “Fire” } if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 { d = append(d, m) // “Hire” // The next few lines are not in the paper but are necessary // to handle two-word inputs correctly. It appears to be // just a bug in the paper's pseudocode. if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { d = d[1:] } } } bestleft = append(bestleft, d[0]) // Recover least weight sequence from bestleft. n := 1 for m := len(words); m > 0; m = bestleft[m] { n++ } seq = make([]int, n) for m := len(words); m > 0; m = bestleft[m] { n-- seq[n] = m } return seq } // wrapPenalty is the penalty for inserting a line break after word s. func wrapPenalty(s string) int64 { switch s[len(s)-1] { case '.', ',', ':', ';': return 0 } return 64 }