From e37f8e8e38cac6d1c93a4c4bff5874deba4ec5bf Mon Sep 17 00:00:00 2001 From: j-james Date: Fri, 1 Jul 2022 22:59:57 -0700 Subject: Implement constructing a basic layout tree from an HTML tree --- src/layout.nim | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/layout.nim diff --git a/src/layout.nim b/src/layout.nim new file mode 100644 index 0000000..14eebaa --- /dev/null +++ b/src/layout.nim @@ -0,0 +1,111 @@ +import std/options +import formats/html + +# const inline_elements = [] + +const block_elements = [ + "html", "body", "article", "section", "nav", "aside", + "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "header", + "footer", "address", "p", "hr", "pre", "blockquote", + "ol", "ul", "menu", "li", "dl", "dt", "dd", "figure", + "figcaption", "main", "div", "table", "form", "fieldset", + "legend", "details", "summary" +] + +type LayoutKind = enum + Inline, Block, Document + +# Improve error messages: suggest a ref object when failing from recursion +type Layout = ref object + case kind*: LayoutKind: + of Inline: + weight: int + of Block: + discard + of Document: + discard + node: Node + parent: Option[Layout] + previous: Option[Layout] + children: seq[Layout] + # x, y, width, height: float + +# Recursively construct the layout tree +func layout(node: Node, parent: Option[Layout], previous: Option[Layout]): Layout = + result = Layout() # !!! ref types are nil by default :-( + var children: seq[Layout] = @[] + var prevchild: Option[Layout] = none(Layout) + var kind: LayoutKind = Inline + case node.kind: + of Element: + for child in node.children: + # FIXME: parent nodes are broken. we pass a ref to result, which is different from the returned layout. + let current = child.layout(parent=some(result), previous=prevchild) + children.add(current) + prevchild = some(current) + if kind == Inline and child.kind == Element and child.tag in block_elements: + kind = Block + if node.children.len == 0: + kind = Block + of Text: + discard + return Layout(node: node, parent: parent, previous: previous, children: children, kind: kind) + +# Assuming the first node of an HTML object is the tag. +# Right now, HTML generation is Bad so this will change in the future +func layout(html: Html): Layout = + result = Layout(kind: Document, node: html[0], parent: none(Layout), previous: none(Layout), children: @[]) + for child in html[0].children: + result.children.add(child.layout(none(Layout), none(Layout))) + +# TODO: change Html into a distinct Node and adjust layout accordingly + +#[type DocumentLayout = ref object + node: Node + children: seq[Layout]]# + +#[func layout_mode(node: Node): LayoutKind = + if node.kind == Text: + return Inline + elif node.children.len > 0: + for child in node.children: + if child.kind == Text: continue + if child.tag in block_elements: + return Block + return Inline + else: + return Block]# + +#[func layout(node: Node, parent: Layout, previous: Layout = nil): Layout = + if node.kind == Element and node.children.len > 0: + var previous: Layout = previous + var current: Layout + for child in node.children: + if previous == nil: + current = child.layout(result) + result.children.add(current) + else: + current = child.layout(result, previous) + result.children.add(current) + previous = current + else: + result = Layout(node: node, parent: parent, previous: previous, children: @[])]# + +when isMainModule: + import formats/uri, protocols/http, std/strutils, print + + proc printLayout(layout: Layout, indentation=0) = + if layout.node.kind == Element: + stdout.write(" ".repeat(indentation)) + stdout.write(layout.node.tag) + stdout.write(" ") + stdout.write(layout.kind) + stdout.write("\n") + print layout.parent + for child in layout.children: + child.printLayout(indentation + 2) + + let text = parseHTML(httpRequest(parseURL("https://example.org:443/index.html")).body) + print layout(text) + for node in text: + printLayout node.layout(none(Layout), none(Layout)) -- cgit v1.2.3-70-g09d2