From aace82a89073d3547856742e9f3a403b1f3fba03 Mon Sep 17 00:00:00 2001
From: j-james
Date: Fri, 1 Jul 2022 22:58:19 -0700
Subject: Move URI handling and rename node.nested to node.children

---
 src/browser.nim        |  11 +--
 src/formats/html.nim   |  15 ++--
 src/formats/uri.nim    | 182 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/gui/terminal.nim   |   4 +-
 src/protocols/http.nim |   2 +-
 src/uri.nim            | 182 -------------------------------------------------
 6 files changed, 199 insertions(+), 197 deletions(-)
 create mode 100644 src/formats/uri.nim
 delete mode 100644 src/uri.nim

(limited to 'src')
diff --git a/src/browser.nim b/src/browser.nim
index 97713a3..c8fdea8 100644
--- a/src/browser.nim
+++ b/src/browser.nim
@@ -1,21 +1,21 @@
-import std/strutils, protocols/http, html, uri
+import std/strutils, protocols/http, formats/[html, uri], gui/terminal
 
 let url = "https://example.org:443/index.html"
 # let url = paramStr(1)
 
-proc request(uri: string) =
+proc request*(uri: string) =
   # This is probably the best place to implement scheme-specific stuff
   let url = parseURL(uri)
   case url.scheme:
     of "http", "https":
       let response = httpRequest(url)
-      renderHTML(response.body)
+      render(parseHTML(response.body))
     # Exercise: view-source
     of "view-source":
       # We must parse the url again without the view-source: prefix
       let url = uri.split(':', maxsplit=1)[1]
       let response = httpRequest(parseURL(url))
-      renderSource(response.body)
+      renderSource(parseHTML((response.body)))
     # Exercise: file:// scheme
     of "file":
       discard
@@ -25,7 +25,8 @@ proc request(uri: string) =
     else:
       raise newException(Exception, "Not a valid scheme: " & url.scheme)
 
-request(url)
+when isMainModule:
+  request(url)
 
 # HTTP/1.1
 # Compression
diff --git a/src/formats/html.nim b/src/formats/html.nim
index 8724295..8299a51 100644
--- a/src/formats/html.nim
+++ b/src/formats/html.nim
@@ -13,16 +13,17 @@ type NodeKind* = enum
 # Clever node implementation from callsamu and XmlNodeObj
 # Note that Text nodes are _only_ text.
 # ex. this <a>test</a> node is three nodes: "this ", " node", and the <a> tag
-type Node* {.acyclic.} = object
+type Node* = ref object
+  # parent*: Node # Unfortunately, we will have to deal with cycles.
   case kind*: NodeKind:
     of Text:
       text*: string
     of Element:
       tag*: string
       attributes*: Table[string, string] # change
-      nested*: seq[Node]
+      children*: seq[Node]
 
-# Note that even plain text is valid HTML.
+# Note that even plain text is valid HTML, in this implementation.
 type Html* = seq[Node]
 
 type ParserState = enum
@@ -54,7 +55,7 @@ func conclude(buffer: string, unfinished: var seq[Node], result: var Html) =
     let split: seq[string] = buffer.strip(false, true, {'/'}).strip().split(' ')
     let tag = split[0].toLower
     let attributes = split[1..^1].attributes
-    let node = Node(kind: Element, tag: tag, attributes: attributes, nested: @[])
+    let node = Node(kind: Element, tag: tag, attributes: attributes, children: @[])
 
     # If we're in a self-closing tag:
     if tag in self_closing_tags:
@@ -64,7 +65,7 @@ func conclude(buffer: string, unfinished: var seq[Node], result: var Html) =
     if tag.len > 0 and tag[0] == '/' or tag in self_closing_tags:
       # Add the element to the parent node
       if unfinished.len > 1:
-        unfinished[^2].nested.add(unfinished.pop)
+        unfinished[^2].children.add(unfinished.pop)
       # Or, if there is no parent node, add the element to the result
       else:
         result.add(unfinished.pop)
@@ -75,7 +76,7 @@ func conclude(buffer: string, unfinished: var seq[Node], result: var Html) =
 
 func finish(unfinished: var seq[Node], result: var seq[Node]) =
   while unfinished.len > 1:
-    unfinished[^2].nested.add(unfinished.pop)
+    unfinished[^2].children.add(unfinished.pop)
   if unfinished.len == 1:
     result.add(unfinished.pop)
 
@@ -90,7 +91,7 @@ func parseHTML*(html: string): Html =
     if not in_tag and c == '<':
       # Add the collected text content to the parent node, if there is text
       if buffer.strip() != "":
-        unfinished[^1].nested.add(Node(kind: Text, text: buffer))
+        unfinished[^1].children.add(Node(kind: Text, text: buffer))
       in_tag = true
       buffer = ""
     # End of a tag
diff --git a/src/formats/uri.nim b/src/formats/uri.nim
new file mode 100644
index 0000000..e2fc7f3
--- /dev/null
+++ b/src/formats/uri.nim
@@ -0,0 +1,182 @@
+import std/strutils
+
+# https://datatracker.ietf.org/doc/html/rfc3986
+type Url* = object
+  scheme*: string    # :
+  authority*: string # //
+  userinfo*: string  # @
+  host*: string      # .
+  port*: int         # :
+  path*: string      # /
+  query*: string     # ?
+  fragment*: string  # #
+
+const
+  gendelims* = {':', '/', '?', '#', '[', ']', '@'}
+  subdelims* = {'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='}
+  reserved* = gendelims + subdelims
+  unreserved* = {'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}
+  otherchars* = {'"', '%', '<', '>', '\\', '^', '`', '{', '|', '}', } # plus, whitespace and control characters
+
+# This might be spec-compliant.
+func parseURL*(url: string): Url =
+  # Each URI must begin with a scheme name.
+  # Scheme names consist of a sequence of characters beginning with a letter,
+  # and followed by any combination of letters, digits, plus, period, or hyphen.
+  var split = url.split(':', maxsplit=1)
+  if split.len == 2:
+    # Although schemes are case-insensitive, an implementation should produce
+    # lowercase scheme names for consistency.
+    result.scheme = split[0].toLower
+    assert allCharsInSet(result.scheme, {'a'..'z', '0'..'9', '+', '-', '.'})
+
+    # The authority component is preceded by a double slash, and is terminated by
+    # the next slash, question mark, or pound sign, or by the end of the URI.
+    if split[1][0..1] == "//":
+      result.authority = split[1][2..^1].split({'/', '?', '#'}, maxsplit=1)[0]
+      split[1] = split[1][result.authority.len + 2 ..< split[1].len]
+
+    # A fragment identifier component is indicate by the presense of a pound sign
+    # character, and terminated by the end of the URI.
+    split = split[1].split('#', maxsplit=1)
+    if split.len == 2:
+      result.fragment = split[1]
+
+    # The query component is indicated by the first question mark character,
+    # and terminated by a pound sign or the end of the URI.
+    split = split[0].split('?', maxsplit=1)
+    if split.len == 2:
+      result.query = split[1]
+
+    # Todo: path parsing is actually more complex. this is cheating.
+    if split[0] != "":
+      result.path = "/" & split[0].strip(leading=true, trailing=false, {'/'})
+
+    if result.authority != "":
+      # The port subcomponent, if present, is designated by an optional port number
+      # in decimal following the host and deliminated from it by a single colon.
+      split = result.authority.rsplit({':'}, maxsplit=1)
+      if split.len == 2:
+        # If we accidentally parsed an IPv6 address: reconstruct
+        if ']' in split[1]:
+          split[0] = split[0] & ":" & split[1]
+          split[1] = ""
+        else:
+          result.port = split[1].parseInt
+          assert allCharsInSet(split[1], {'0'..'9'})
+
+      # The userinfo subcomponent, if present, is followed by an at-sign
+      # that delimits it from the host.
+      split = split[0].split('@', maxsplit=1)
+      if split.len == 2:
+        result.userinfo = split[0]
+        # Todo: parse hosts
+        result.host = split[1]
+      else:
+        result.host = split[0]
+      # Todo: is this assert right?
+      assert allCharsInSet(result.host, unreserved + subdelims + {':', '[', ']'})
+  else:
+    raise newException(RangeDefect, "URL does not begin with a scheme")
+
+  #[ Todo: advanced resolution and relative references
+  # A relative reference takes advantage of the hierarchical syntax to express
+  # a reference relative to the name space of another hierarchical URI.
+  if url.len == 0:
+    raise newException(RangeDefect, "URL is an empty string")
+  # Relative path reference
+  elif url[0] == '.':
+    return
+  # Network path reference
+  elif url.len > 1 and url[0..1] == "//":
+    return
+  # Absolute path reference
+  elif url[0] == '/':
+    discard url.split('/', maxsplit=1)
+  ]#
+
+#[
+# This is compact. But it's probably not spec-compliant.
+func parseURL*(url: string): Url =
+  # https://google.com/path?query#fragment
+  # https, //google.com/path?query#fragment
+  var split: seq[string] = url.split(':', maxsplit=1)
+  if split.len == 2:
+    result.scheme = split[0]
+
+    # //google.com/path?query, fragment
+    split = split[1].rsplit('#', maxsplit=1)
+    if split.len == 2:
+      result.fragment = split[1]
+
+    # //google.com/path, query
+    # //google.com/, query
+    split = split[0].rsplit('?', maxsplit=1)
+    if split.len == 2:
+      result.query = split[1]
+
+    # //google.com/path
+    # //google.com
+    # somethingelse
+    if split[0][0..1] == "//":
+      split = split[0].strip(leading=true, trailing=false, {'/'}).split('/', maxsplit=1)
+      result.authority = split[0]
+      if split.len == 2:
+        result.path = split[1]
+    else:
+      # Note: This is a weird state.
+      result.path = split[0].strip(leading=true, trailing=true, {'/'})
+      debugEcho split[0]
+
+    # userinfo@host:port
+    # userinfo, host:port
+    var remainder = result.authority
+    split = result.authority.split('@', maxsplit=1)
+    if split.len == 2:
+      result.userinfo = split[0]
+      remainder = split[1]
+
+    # host, port
+    split = remainder.rsplit(':', maxsplit=1)
+    remainder = split[0]
+    if split.len == 2:
+      result.port = split[1].parseInt
+
+    result.host = split[0]
+
+  else:
+    raise newException(OSError, "URL does not contain a colon")
+]#
+
+# assert parseURL("google.com") == Url(authority: "google.com", host: "google.com")
+# assert parseURL("http://") == Url(scheme: "http")
+assert parseURL("https://google.com") == Url(scheme: "https", authority: "google.com", host: "google.com")
+assert parseURL("https://google.com/path//path") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path//path")
+assert parseURL("https://google.com/path?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", query: "query")
+assert parseURL("https://google.com/path/?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path/", query: "query")
+assert parseURL("https://google.com?query") == Url(scheme: "https", authority: "google.com", host: "google.com", query: "query")
+assert parseURL("https://google.com/path#fragment") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment")
+assert parseURL("https://google.com/path#fragment?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment?query")
+assert parseURL("https://192.168.0.1/path#fragment?query") == Url(scheme: "https", authority: "192.168.0.1", host: "192.168.0.1", path: "/path", fragment: "fragment?query")
+assert parseURL("https://12:12:12:12/path#fragment?query") == Url(scheme: "https", authority: "12:12:12:12", host: "12:12:12", port: 12, path: "/path", fragment: "fragment?query")
+assert parseURL("https://[12:213:213:fr:12]:1200/path#fragment?query") == Url(scheme: "https", authority: "[12:213:213:fr:12]:1200", host: "[12:213:213:fr:12]", port: 1200, path: "/path", fragment: "fragment?query")
+assert parseURL("https://userinfo@host.com:8080/path#fragment?query") == Url(scheme: "https", authority: "userinfo@host.com:8080", userinfo: "userinfo", host: "host.com", port: 8080, path: "/path", fragment: "fragment?query")
+
+assert parseURL("ftp://ftp.is.co.za/rfc/rfc1808.txt") == Url(scheme: "ftp", authority: "ftp.is.co.za", host: "ftp.is.co.za", path: "/rfc/rfc1808.txt")
+assert parseURL("http://www.ietf.org/rfc/rfc2396.txt") == Url(scheme: "http", authority: "www.ietf.org", host: "www.ietf.org", path: "/rfc/rfc2396.txt")
+assert parseURL("ldap://[2001:db8::7]/c=GB?objectClass?one") == Url(scheme: "ldap", authority: "[2001:db8::7]", host: "[2001:db8::7]", path: "/c=GB", query: "objectClass?one")
+
+# Todo: Ugh, are these right?
+assert parseURL("mailto:John.Doe@example.com") == Url(scheme: "mailto", path: "/John.Doe@example.com") # ugh is this right
+assert parseURL("news:comp.infosystems.www.servers.unix") == Url(scheme: "news", path: "/comp.infosystems.www.servers.unix")
+assert parseURL("tel:+1-816-555-1212") == Url(scheme: "tel", path: "/+1-816-555-1212")
+assert parseURL("telnet://192.0.2.16:80/") == Url(scheme: "telnet", authority: "192.0.2.16:80", host: "192.0.2.16", port: 80, path: "/")
+assert parseURL("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") == Url(scheme: "urn", path: "/oasis:names:specification:docbook:dtd:xml:4.1.2")
+
+func encodeURL(url: string): string =
+  for c in url:
+    if c in unreserved:
+      result &= c
+    # Note: `+`?
+    else:
+      result &= "%" & toHex(ord(c), 2)
diff --git a/src/gui/terminal.nim b/src/gui/terminal.nim
index e893081..f4c7c92 100644
--- a/src/gui/terminal.nim
+++ b/src/gui/terminal.nim
@@ -9,7 +9,7 @@ proc print(node: Node, indent=0, raw=false) =
         stdout.write(" " & attribute[0] & "=" & attribute[1])
       stdout.write(">")
       stdout.write('\n')
-    for i in node.nested:
+    for i in node.children:
       i.print(indent+2)
     if not raw:
       stdout.write(" ".repeat(indent))
@@ -28,7 +28,7 @@ proc renderSource*(html: Html) =
     node.print(0, true)
 
 when isMainModule:
-  import ../protocols/http, ../uri
+  import ../protocols/http, ../formats/uri
   let url = "https://example.org:443/index.html"
   let request = httpRequest(parseUrl(url))
   let parsed = parseHTML(request.body)
diff --git a/src/protocols/http.nim b/src/protocols/http.nim
index df812fd..2de11df 100644
--- a/src/protocols/http.nim
+++ b/src/protocols/http.nim
@@ -1,4 +1,4 @@
-import std/[strutils, net], ../uri
+import std/[strutils, net], ../formats/uri
 
 # https://datatracker.ietf.org/doc/html/rfc1945
 
diff --git a/src/uri.nim b/src/uri.nim
deleted file mode 100644
index e2fc7f3..0000000
--- a/src/uri.nim
+++ /dev/null
@@ -1,182 +0,0 @@
-import std/strutils
-
-# https://datatracker.ietf.org/doc/html/rfc3986
-type Url* = object
-  scheme*: string    # :
-  authority*: string # //
-  userinfo*: string  # @
-  host*: string      # .
-  port*: int         # :
-  path*: string      # /
-  query*: string     # ?
-  fragment*: string  # #
-
-const
-  gendelims* = {':', '/', '?', '#', '[', ']', '@'}
-  subdelims* = {'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='}
-  reserved* = gendelims + subdelims
-  unreserved* = {'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}
-  otherchars* = {'"', '%', '<', '>', '\\', '^', '`', '{', '|', '}', } # plus, whitespace and control characters
-
-# This might be spec-compliant.
-func parseURL*(url: string): Url =
-  # Each URI must begin with a scheme name.
-  # Scheme names consist of a sequence of characters beginning with a letter,
-  # and followed by any combination of letters, digits, plus, period, or hyphen.
-  var split = url.split(':', maxsplit=1)
-  if split.len == 2:
-    # Although schemes are case-insensitive, an implementation should produce
-    # lowercase scheme names for consistency.
-    result.scheme = split[0].toLower
-    assert allCharsInSet(result.scheme, {'a'..'z', '0'..'9', '+', '-', '.'})
-
-    # The authority component is preceded by a double slash, and is terminated by
-    # the next slash, question mark, or pound sign, or by the end of the URI.
-    if split[1][0..1] == "//":
-      result.authority = split[1][2..^1].split({'/', '?', '#'}, maxsplit=1)[0]
-      split[1] = split[1][result.authority.len + 2 ..< split[1].len]
-
-    # A fragment identifier component is indicate by the presense of a pound sign
-    # character, and terminated by the end of the URI.
-    split = split[1].split('#', maxsplit=1)
-    if split.len == 2:
-      result.fragment = split[1]
-
-    # The query component is indicated by the first question mark character,
-    # and terminated by a pound sign or the end of the URI.
-    split = split[0].split('?', maxsplit=1)
-    if split.len == 2:
-      result.query = split[1]
-
-    # Todo: path parsing is actually more complex. this is cheating.
-    if split[0] != "":
-      result.path = "/" & split[0].strip(leading=true, trailing=false, {'/'})
-
-    if result.authority != "":
-      # The port subcomponent, if present, is designated by an optional port number
-      # in decimal following the host and deliminated from it by a single colon.
-      split = result.authority.rsplit({':'}, maxsplit=1)
-      if split.len == 2:
-        # If we accidentally parsed an IPv6 address: reconstruct
-        if ']' in split[1]:
-          split[0] = split[0] & ":" & split[1]
-          split[1] = ""
-        else:
-          result.port = split[1].parseInt
-          assert allCharsInSet(split[1], {'0'..'9'})
-
-      # The userinfo subcomponent, if present, is followed by an at-sign
-      # that delimits it from the host.
-      split = split[0].split('@', maxsplit=1)
-      if split.len == 2:
-        result.userinfo = split[0]
-        # Todo: parse hosts
-        result.host = split[1]
-      else:
-        result.host = split[0]
-      # Todo: is this assert right?
-      assert allCharsInSet(result.host, unreserved + subdelims + {':', '[', ']'})
-  else:
-    raise newException(RangeDefect, "URL does not begin with a scheme")
-
-  #[ Todo: advanced resolution and relative references
-  # A relative reference takes advantage of the hierarchical syntax to express
-  # a reference relative to the name space of another hierarchical URI.
-  if url.len == 0:
-    raise newException(RangeDefect, "URL is an empty string")
-  # Relative path reference
-  elif url[0] == '.':
-    return
-  # Network path reference
-  elif url.len > 1 and url[0..1] == "//":
-    return
-  # Absolute path reference
-  elif url[0] == '/':
-    discard url.split('/', maxsplit=1)
-  ]#
-
-#[
-# This is compact. But it's probably not spec-compliant.
-func parseURL*(url: string): Url =
-  # https://google.com/path?query#fragment
-  # https, //google.com/path?query#fragment
-  var split: seq[string] = url.split(':', maxsplit=1)
-  if split.len == 2:
-    result.scheme = split[0]
-
-    # //google.com/path?query, fragment
-    split = split[1].rsplit('#', maxsplit=1)
-    if split.len == 2:
-      result.fragment = split[1]
-
-    # //google.com/path, query
-    # //google.com/, query
-    split = split[0].rsplit('?', maxsplit=1)
-    if split.len == 2:
-      result.query = split[1]
-
-    # //google.com/path
-    # //google.com
-    # somethingelse
-    if split[0][0..1] == "//":
-      split = split[0].strip(leading=true, trailing=false, {'/'}).split('/', maxsplit=1)
-      result.authority = split[0]
-      if split.len == 2:
-        result.path = split[1]
-    else:
-      # Note: This is a weird state.
-      result.path = split[0].strip(leading=true, trailing=true, {'/'})
-      debugEcho split[0]
-
-    # userinfo@host:port
-    # userinfo, host:port
-    var remainder = result.authority
-    split = result.authority.split('@', maxsplit=1)
-    if split.len == 2:
-      result.userinfo = split[0]
-      remainder = split[1]
-
-    # host, port
-    split = remainder.rsplit(':', maxsplit=1)
-    remainder = split[0]
-    if split.len == 2:
-      result.port = split[1].parseInt
-
-    result.host = split[0]
-
-  else:
-    raise newException(OSError, "URL does not contain a colon")
-]#
-
-# assert parseURL("google.com") == Url(authority: "google.com", host: "google.com")
-# assert parseURL("http://") == Url(scheme: "http")
-assert parseURL("https://google.com") == Url(scheme: "https", authority: "google.com", host: "google.com")
-assert parseURL("https://google.com/path//path") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path//path")
-assert parseURL("https://google.com/path?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", query: "query")
-assert parseURL("https://google.com/path/?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path/", query: "query")
-assert parseURL("https://google.com?query") == Url(scheme: "https", authority: "google.com", host: "google.com", query: "query")
-assert parseURL("https://google.com/path#fragment") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment")
-assert parseURL("https://google.com/path#fragment?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment?query")
-assert parseURL("https://192.168.0.1/path#fragment?query") == Url(scheme: "https", authority: "192.168.0.1", host: "192.168.0.1", path: "/path", fragment: "fragment?query")
-assert parseURL("https://12:12:12:12/path#fragment?query") == Url(scheme: "https", authority: "12:12:12:12", host: "12:12:12", port: 12, path: "/path", fragment: "fragment?query")
-assert parseURL("https://[12:213:213:fr:12]:1200/path#fragment?query") == Url(scheme: "https", authority: "[12:213:213:fr:12]:1200", host: "[12:213:213:fr:12]", port: 1200, path: "/path", fragment: "fragment?query")
-assert parseURL("https://userinfo@host.com:8080/path#fragment?query") == Url(scheme: "https", authority: "userinfo@host.com:8080", userinfo: "userinfo", host: "host.com", port: 8080, path: "/path", fragment: "fragment?query")
-
-assert parseURL("ftp://ftp.is.co.za/rfc/rfc1808.txt") == Url(scheme: "ftp", authority: "ftp.is.co.za", host: "ftp.is.co.za", path: "/rfc/rfc1808.txt")
-assert parseURL("http://www.ietf.org/rfc/rfc2396.txt") == Url(scheme: "http", authority: "www.ietf.org", host: "www.ietf.org", path: "/rfc/rfc2396.txt")
-assert parseURL("ldap://[2001:db8::7]/c=GB?objectClass?one") == Url(scheme: "ldap", authority: "[2001:db8::7]", host: "[2001:db8::7]", path: "/c=GB", query: "objectClass?one")
-
-# Todo: Ugh, are these right?
-assert parseURL("mailto:John.Doe@example.com") == Url(scheme: "mailto", path: "/John.Doe@example.com") # ugh is this right
-assert parseURL("news:comp.infosystems.www.servers.unix") == Url(scheme: "news", path: "/comp.infosystems.www.servers.unix")
-assert parseURL("tel:+1-816-555-1212") == Url(scheme: "tel", path: "/+1-816-555-1212")
-assert parseURL("telnet://192.0.2.16:80/") == Url(scheme: "telnet", authority: "192.0.2.16:80", host: "192.0.2.16", port: 80, path: "/")
-assert parseURL("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") == Url(scheme: "urn", path: "/oasis:names:specification:docbook:dtd:xml:4.1.2")
-
-func encodeURL(url: string): string =
-  for c in url:
-    if c in unreserved:
-      result &= c
-    # Note: `+`?
-    else:
-      result &= "%" & toHex(ord(c), 2)
-- 
cgit v1.2.3-70-g09d2