From 26248678aafc2f8e277d4bdafc116f2b349b02c5 Mon Sep 17 00:00:00 2001 From: clsr Date: Fri, 18 Aug 2017 13:45:49 +0200 Subject: Initial commit --- cnmfmt/cnmfmt.go | 525 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 525 insertions(+) create mode 100644 cnmfmt/cnmfmt.go (limited to 'cnmfmt/cnmfmt.go') diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go new file mode 100644 index 0000000..cb8dc64 --- /dev/null +++ b/cnmfmt/cnmfmt.go @@ -0,0 +1,525 @@ +// Package cnmfmt provides parsing and composition for CNMfmt formatting. +package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt" + +import ( + "bytes" + "io" + "strings" + + "contnet.org/lib/cnm-go" +) + +func init() { + cnm.RegisterTextContentParser("fmt", parseTextFmt) +} + +// Text represents a paragraph of CNMfmt text. +type Text struct { + // Spans are spans of formatted text. + Spans []Span +} + +// ParseParagraph parses a single CNMfmt text paragraph s. +func ParseParagraph(s string) Text { + s = cnm.CollapseWhitespace(s) + + t := Text{} + var buf bytes.Buffer + format := Format{} + last := rune(-1) + url := false + + for _, r := range s { + if url && format.Link == "" { // need URL for link + if handleURL(r, &last, &format, &buf) { + continue + } + } + + switch r { + case '*', '/', '_', '`', '@': + handleTag(r, &last, &t, &format, &buf, &url) + + case '\\': + if last == '\\' { + buf.WriteString("\\\\") + last = -1 + } else { + if last >= 0 { + buf.WriteRune(last) + } + last = '\\' + } + + default: + if last >= 0 { + buf.WriteRune(last) + } + buf.WriteRune(r) + last = -1 + } + } + + if url && format.Link == "" { + if last >= 0 { + buf.WriteRune(last) + } + format.Link = Unescape(buf.String()) + buf.Reset() + } else if last >= 0 { + buf.WriteRune(last) + } + last = -1 + handleTag(-1, &last, &t, &format, &buf, &url) + + t.trimUnescape() + + return t +} + +func (t *Text) trimUnescape() { + var spans []Span + + for _, span := range t.Spans { + if span.Text != "" || span.Format.Link != "" { + spans = append(spans, span) + } + } + t.Spans, spans = spans, nil + + for i := len(t.Spans) - 1; i >= 0; i-- { + span := t.Spans[i] + if span.Text != "" || span.Format.Link != "" { + spans = append(spans, span) + } + } + for i := 0; i < len(spans)/2; i++ { + spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i] + } + t.Spans = spans + + for i := range t.Spans { + t.Spans[i].Text = Unescape(t.Spans[i].Text) + } +} + +func (t *Text) appendSpan(format Format, txt string) { + if txt != "" || format.Link != "" { + t.Spans = append(t.Spans, Span{format, txt}) + } +} + +func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool { + if r == '@' && *last == '@' { // end without text + format.Link = Unescape(buf.String()) + buf.Reset() + return false + } else if *last == '\\' { + buf.WriteByte('\\') + buf.WriteRune(r) + *last = -1 + } else if r == '\\' || r == '@' { + *last = r + } else if r != ' ' { // url + buf.WriteRune(r) + } else if buf.Len() > 0 { // space, then text + format.Link = Unescape(buf.String()) + buf.Reset() + } // else: prefix space + return true +} + +func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) { + if *last == '\\' { + buf.WriteRune(r) + *last = -1 + } else if *last == r { + txt.appendSpan(*format, buf.String()) + buf.Reset() + switch r { + case '*': + format.Bold = !format.Bold + case '/': + format.Italic = !format.Italic + case '_': + format.Underline = !format.Underline + case '`': + format.Monospace = !format.Monospace + case '@': + format.Link = "" + *url = !*url + } + *last = -1 + } else { + switch *last { + case '*', '/', '_', '`', '@': + buf.WriteRune(*last) + } + *last = r + } +} + +// WriteIndent writes the formatted text indented by n tabs. +func (t Text) WriteIndent(w io.Writer, n int) error { + var state [5]byte // bold, italic, underline, monospace, link + si := 0 + format := Format{} + spans := EscapeSpans(t.Spans) + var line []string + for _, span := range spans { + order := tagOrder(state[:si], format, span.Format) + for _, f := range order { + switch f { + case '*': + format.Bold = !format.Bold + line = append(line, "**") + case '/': + format.Italic = !format.Italic + line = append(line, "//") + case '_': + format.Underline = !format.Underline + line = append(line, "__") + case '`': + format.Monospace = !format.Monospace + line = append(line, "``") + case '@': + if format.Link != "" { + line = append(line, "@@") + } + if span.Format.Link != "" { + pad := "" + if span.Text != "" { + pad = " " + } + line = append(line, "@@", cnm.Escape(span.Format.Link), pad) + } + } + } + line = append(line, span.Text) + si = cleanupTags(state[:], order, span.Format) + format = span.Format + } + return writeIndent(w, strings.Join(line, ""), n) +} + +func tagOrder(state []byte, old, new Format) []byte { + ldiff := "" + if old.Link != new.Link { + ldiff = "1" + } + diff := Format{ + Bold: old.Bold != new.Bold, + Italic: old.Italic != new.Italic, + Underline: old.Underline != new.Underline, + Monospace: old.Monospace != new.Monospace, + Link: ldiff, + } + + var order [5]byte + oi := 0 + for i := len(state) - 1; i >= 0; i-- { + switch state[i] { + case '*': + if diff.Bold { + order[oi] = '*' + oi++ + diff.Bold = false + } + case '/': + if diff.Italic { + order[oi] = '/' + oi++ + diff.Italic = false + } + case '_': + if diff.Underline { + order[oi] = '_' + oi++ + diff.Underline = false + } + case '`': + if diff.Monospace { + order[oi] = '`' + oi++ + diff.Monospace = false + } + case '@': + if diff.Link != "" { + order[oi] = '@' + oi++ + diff.Link = "" + } + } + } + + if diff.Bold { + order[oi] = '*' + oi++ + } + if diff.Italic { + order[oi] = '/' + oi++ + } + if diff.Underline { + order[oi] = '_' + oi++ + } + if diff.Monospace { + order[oi] = '`' + oi++ + } + if diff.Link != "" { + order[oi] = '@' + oi++ + } + + return order[:oi] +} + +func cleanupTags(state []byte, order []byte, format Format) int { + var newState [10]byte + copy(newState[:5], state) + copy(newState[5:], order) + for i := range newState { + switch newState[i] { + case '*': + if !format.Bold { + newState[i] = 0 + } + case '/': + if !format.Italic { + newState[i] = 0 + } + case '_': + if !format.Underline { + newState[i] = 0 + } + case '`': + if !format.Monospace { + newState[i] = 0 + } + case '@': + if format.Link == "" { + newState[i] = 0 + } + } + } + si := 0 + for _, f := range newState { + if f > 0 { + state[si] = f + si++ + } + } + return si +} + +// Span represents a span of text with a format. +type Span struct { + // Format is the format of the text. + Format Format + + // Text is the text content of the span. + Text string +} + +// Format represents a state of CNMfmt formatting. +type Format struct { + // Bold text. + Bold bool + + // Italic text. + Italic bool + + // Underlined text. + Underline bool + + // Monospaced text. + Monospace bool + + // Hyperlink URL (if non-empty). + Link string +} + +// Escape escapes CNMfmt and CNM text special characters. +func Escape(s string) string { + return EscapeFmt(cnm.Escape(s)) +} + +// EscapeSpans escapes CNMfmt and CNM text within spans. +// +// This function will not needlessly escape spaces at the start or end of a +// span if the sibling span contains nonspaces. +func EscapeSpans(spans []Span) []Span { + // XXX: this is an ugly solution + esc := make([]Span, len(spans)) + for i := range spans { + start := false + end := false + span := spans[i] + if i+1 < len(spans) { + s := spans[i+1].Text + if len(s) > 0 && s[0] != ' ' { + span.Text = span.Text + "x" + end = true + } + } + if i > 0 { + s := spans[i-1].Text + if len(s) > 0 && s[len(s)-1] != ' ' { + span.Text = "x" + span.Text + start = true + } + } + span.Text = Escape(span.Text) + if start { + span.Text = span.Text[1:] + } + if end { + span.Text = span.Text[:len(span.Text)-1] + } + esc[i] = span + } + return esc +} + +var escapeReplacer = strings.NewReplacer( + `*`, `\*`, + `/`, `\/`, + `_`, `\_`, + "`", "\\`", + `@`, `\@`, +) + +// EscapeFmt escapes only CNMfmt format toggle characters. +func EscapeFmt(s string) string { + return escapeReplacer.Replace(s) +} + +// Unescape resolves CNM text and CNMfmt escape sequences in s. +func Unescape(s string) string { + return cnm.Unescape(UnescapeFmt(s)) +} + +var unescapeReplacer = strings.NewReplacer( + `\\`, `\\`, + `\*`, `*`, + `\/`, `/`, + `\_`, `_`, + "\\`", "`", + `\@`, `@`, +) + +// UnescapeFmt resolves only CNMfmt escape sequences in s. +func UnescapeFmt(s string) string { + return unescapeReplacer.Replace(s) +} + +// TextFmtContents represents CNM `text fmt` contents. +type TextFmtContents struct { + Paragraphs []Text +} + +// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt +// paragraphs. +func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock { + return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs}) +} + +// WriteIndent writes the formatted text contents indented by n tabs. +func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error { + for i, p := range tf.Paragraphs { + if i != 0 { + if err := writeIndent(w, "", 0); err != nil { + return err + } + } + if err := p.WriteIndent(w, n); err != nil { + return err + } + } + return nil +} + +// Parse parses paragraphs of CNMfmt text. +func Parse(paragraphs string) []Text { + var txt []Text + var paragraph []string + + for _, line := range strings.Split(paragraphs, "\n") { + end := false + if line != "" { + if strings.Trim(line, "\n\r\t\f ") == "" { + end = true + } else { + paragraph = append(paragraph, line) + } + } else if len(paragraph) > 0 { + end = true + } + if end { + txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) + paragraph = nil + } + } + if len(paragraph) > 0 { + txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) + } + + return txt +} + +func writeIndent(w io.Writer, s string, depth int) error { + const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + + if s == "" { + _, err := w.Write([]byte{'\n'}) + return err + } + if depth == 0 { + _, err := w.Write([]byte(s + "\n")) + return err + } + + var ind string + if depth <= len(tabs) { + ind = tabs[:depth] + } else { + ind = strings.Repeat("\t", depth) + } + _, err := w.Write([]byte(ind + s + "\n")) + return err + +} + +func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) { + txt := TextFmtContents{} + var paragraph []string + var err error + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.RawText() + end := false + if text, ok := token.(*cnm.TokenRawText); ok { + if strings.Trim(text.Text, "\n\r\t\f ") == "" { + end = true + } else { + paragraph = append(paragraph, text.Text) + } + } else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 { + end = true + } + if end { + txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) + paragraph = nil + } + err = p.Next() + } + if len(paragraph) > 0 { + txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) + } + return txt, err +} -- cgit