From 26248678aafc2f8e277d4bdafc116f2b349b02c5 Mon Sep 17 00:00:00 2001 From: clsr Date: Fri, 18 Aug 2017 13:45:49 +0200 Subject: Initial commit --- document.go | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 document.go (limited to 'document.go') diff --git a/document.go b/document.go new file mode 100644 index 0000000..b5cdbe5 --- /dev/null +++ b/document.go @@ -0,0 +1,278 @@ +// Package cnm implements CNM document parsing and composition. +package cnm // import "contnet.org/lib/cnm-go" + +import ( + "bufio" + "io" + "path" + "strings" +) + +// Document represents a CNM document. +type Document struct { + // Title is the document title (top-level "title" block). + Title string + + // Links is a list of document-level hyperlinks (top-level "links" block). + Links []Link + + // Site is a sitemap (top-level "site" block). + Site Site + + // Content is the document content (top-level "content" block). + Content *ContentBlock +} + +// ParseDocument parses a CNM document from r. +func ParseDocument(r io.Reader) (doc *Document, err error) { + p := NewParser(r) + doc = &Document{} + err = p.Next() + for err == nil { + token := p.Block() + if err = p.Next(); err != nil { + break + } + if blk, ok := token.(*TokenBlock); ok { + switch blk.Name { + case "title": + err = doc.parseTitle(p, blk) + case "links": + err = doc.parseLinks(p, blk) + case "site": + err = doc.Site.parse(p, blk) + case "content": + if doc.Content == nil { + doc.Content = &ContentBlock{name: "content"} + } + err = doc.Content.parse(p, blk) + default: + // discard lines inside this block + for err == nil { + if !p.Empty() && p.Indent() <= blk.Indent() { + break + } + err = p.Next() + } + } + } + } + if err == io.EOF { + err = nil + } + return +} + +func (doc *Document) Write(w io.Writer) error { + bw := bufio.NewWriter(w) + if doc.Title != "" { + if err := writeIndent(bw, "title", 0); err != nil { + return err + } + if err := writeIndent(bw, Escape(doc.Title), 1); err != nil { + return err + } + } + if len(doc.Links) > 0 { + if err := writeIndent(bw, "links", 0); err != nil { + return err + } + for _, link := range doc.Links { + if err := link.WriteIndent(bw, 1); err != nil { + return err + } + } + } + if len(doc.Site.Children) > 0 { + if err := writeIndent(bw, "site", 0); err != nil { + return err + } + for _, site := range doc.Site.Children { + if err := site.WriteIndent(bw, 1); err != nil { + return err + } + } + } + if doc.Content != nil { + if err := doc.Content.WriteIndent(bw, 0); err != nil { + return err + } + } + return bw.Flush() +} + +func (doc *Document) parseTitle(p *Parser, block *TokenBlock) (err error) { + s, err := getSimpleText(p, block) + if doc.Title == "" { + doc.Title = s + } else { + doc.Title += " " + s + } + return +} + +func (doc *Document) parseLinks(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + if blk.Name == "" { + err = parseUnknown(p, blk) + } else { + link := Link{ + URL: blk.Name, + Name: strings.Join(blk.Args, " "), + } + doc.Links = append(doc.Links, link) + if err = p.Next(); err != nil { + break + } + doc.Links[len(doc.Links)-1].Description, err = getSimpleText(p, blk) + } + } + } + return +} + +func getSimpleText(p *Parser, block *TokenBlock) (s string, err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.SimpleText() + if text, ok := token.(*TokenSimpleText); ok && text.Text != "" { + if s == "" { + s = text.Text + } else { + s += " " + text.Text + } + } + + err = p.Next() + } + return +} + +// Link represents a document-level hyperlink in the "links" top-level block. +type Link struct { + // URL is the hyperlink URL. + URL string + + // Name is the hyperlink text. + Name string + + // Description is the description of the hyperlink. + Description string +} + +// WriteIndent writes the link URL, name and description indented by n tabs. +func (link Link) WriteIndent(w io.Writer, n int) error { + s := Escape(link.URL) + if link.Name != "" { + s += " " + Escape(link.Name) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + if link.Description != "" { + if err := writeIndent(w, Escape(link.Description), n+1); err != nil { + return err + } + } + return nil +} + +// Site represents a node in the sitemap in the "site" top-level block. +type Site struct { + // Path is the node's path fragment. + Path string + + // Name is the node's name. + Name string + + // Children are the nodes below this node. + Children []Site +} + +// WriteIndent writes the sitemap indented by n tabs. +func (site Site) WriteIndent(w io.Writer, n int) error { + s := Escape(site.Path) + if site.Name != "" { + s += " " + Escape(site.Name) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + for _, ch := range site.Children { + if err := ch.WriteIndent(w, n+1); err != nil { + return err + } + } + return nil +} + +func (site *Site) parse(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + if blk.Name == "" { + err = parseUnknown(p, blk) + } else { + s := Site{ + Path: strings.Trim(path.Clean(blk.Name), "/"), + Name: strings.Join(blk.Args, " "), + } + site.Children = append(site.Children, s) + if err = p.Next(); err != nil { + break + } + err = site.Children[len(site.Children)-1].parse(p, blk) + } + } else { + err = p.Next() + } + } + return +} + +func parseUnknown(p *Parser, block *TokenBlock) (err error) { + err = p.Next() + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + // discard lines inside this block + err = p.Next() + } + return +} + +func writeIndent(w io.Writer, s string, depth int) error { + const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + + if s == "" { + _, err := w.Write([]byte{'\n'}) + return err + } + if depth == 0 { + _, err := w.Write([]byte(s + "\n")) + return err + } + + var ind string + if depth <= len(tabs) { + ind = tabs[:depth] + } else { + ind = strings.Repeat("\t", depth) + } + _, err := w.Write([]byte(ind + s + "\n")) + return err +} -- cgit