import std/strutils # https://datatracker.ietf.org/doc/html/rfc3986 type Url* = object scheme*: string # : authority*: string # // userinfo*: string # @ host*: string # . port*: int # : path*: string # / query*: string # ? fragment*: string # # const gendelims* = {':', '/', '?', '#', '[', ']', '@'} subdelims* = {'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='} reserved* = gendelims + subdelims unreserved* = {'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'} otherchars* = {'"', '%', '<', '>', '\\', '^', '`', '{', '|', '}', } # plus, whitespace and control characters # This might be spec-compliant. func parseURL*(url: string): Url = # Each URI must begin with a scheme name. # Scheme names consist of a sequence of characters beginning with a letter, # and followed by any combination of letters, digits, plus, period, or hyphen. var split = url.split(':', maxsplit=1) if split.len == 2: # Although schemes are case-insensitive, an implementation should produce # lowercase scheme names for consistency. result.scheme = split[0].toLower assert allCharsInSet(result.scheme, {'a'..'z', '0'..'9', '+', '-', '.'}) # The authority component is preceded by a double slash, and is terminated by # the next slash, question mark, or pound sign, or by the end of the URI. if split[1][0..1] == "//": result.authority = split[1][2..^1].split({'/', '?', '#'}, maxsplit=1)[0] split[1] = split[1][result.authority.len + 2 ..< split[1].len] # A fragment identifier component is indicate by the presense of a pound sign # character, and terminated by the end of the URI. split = split[1].split('#', maxsplit=1) if split.len == 2: result.fragment = split[1] # The query component is indicated by the first question mark character, # and terminated by a pound sign or the end of the URI. split = split[0].split('?', maxsplit=1) if split.len == 2: result.query = split[1] # Todo: path parsing is actually more complex. this is cheating. if split[0] != "": result.path = "/" & split[0].strip(leading=true, trailing=false, {'/'}) if result.authority != "": # The port subcomponent, if present, is designated by an optional port number # in decimal following the host and deliminated from it by a single colon. split = result.authority.rsplit({':'}, maxsplit=1) if split.len == 2: # If we accidentally parsed an IPv6 address: reconstruct if ']' in split[1]: split[0] = split[0] & ":" & split[1] split[1] = "" else: result.port = split[1].parseInt assert allCharsInSet(split[1], {'0'..'9'}) # The userinfo subcomponent, if present, is followed by an at-sign # that delimits it from the host. split = split[0].split('@', maxsplit=1) if split.len == 2: result.userinfo = split[0] # Todo: parse hosts result.host = split[1] else: result.host = split[0] # Todo: is this assert right? assert allCharsInSet(result.host, unreserved + subdelims + {':', '[', ']'}) else: raise newException(RangeDefect, "URL does not begin with a scheme") #[ Todo: advanced resolution and relative references # A relative reference takes advantage of the hierarchical syntax to express # a reference relative to the name space of another hierarchical URI. if url.len == 0: raise newException(RangeDefect, "URL is an empty string") # Relative path reference elif url[0] == '.': return # Network path reference elif url.len > 1 and url[0..1] == "//": return # Absolute path reference elif url[0] == '/': discard url.split('/', maxsplit=1) ]# #[ # This is compact. But it's probably not spec-compliant. func parseURL*(url: string): Url = # https://google.com/path?query#fragment # https, //google.com/path?query#fragment var split: seq[string] = url.split(':', maxsplit=1) if split.len == 2: result.scheme = split[0] # //google.com/path?query, fragment split = split[1].rsplit('#', maxsplit=1) if split.len == 2: result.fragment = split[1] # //google.com/path, query # //google.com/, query split = split[0].rsplit('?', maxsplit=1) if split.len == 2: result.query = split[1] # //google.com/path # //google.com # somethingelse if split[0][0..1] == "//": split = split[0].strip(leading=true, trailing=false, {'/'}).split('/', maxsplit=1) result.authority = split[0] if split.len == 2: result.path = split[1] else: # Note: This is a weird state. result.path = split[0].strip(leading=true, trailing=true, {'/'}) debugEcho split[0] # userinfo@host:port # userinfo, host:port var remainder = result.authority split = result.authority.split('@', maxsplit=1) if split.len == 2: result.userinfo = split[0] remainder = split[1] # host, port split = remainder.rsplit(':', maxsplit=1) remainder = split[0] if split.len == 2: result.port = split[1].parseInt result.host = split[0] else: raise newException(OSError, "URL does not contain a colon") ]# # assert parseURL("google.com") == Url(authority: "google.com", host: "google.com") # assert parseURL("http://") == Url(scheme: "http") assert parseURL("https://google.com") == Url(scheme: "https", authority: "google.com", host: "google.com") assert parseURL("https://google.com/path//path") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path//path") assert parseURL("https://google.com/path?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", query: "query") assert parseURL("https://google.com/path/?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path/", query: "query") assert parseURL("https://google.com?query") == Url(scheme: "https", authority: "google.com", host: "google.com", query: "query") assert parseURL("https://google.com/path#fragment") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment") assert parseURL("https://google.com/path#fragment?query") == Url(scheme: "https", authority: "google.com", host: "google.com", path: "/path", fragment: "fragment?query") assert parseURL("https://192.168.0.1/path#fragment?query") == Url(scheme: "https", authority: "192.168.0.1", host: "192.168.0.1", path: "/path", fragment: "fragment?query") assert parseURL("https://12:12:12:12/path#fragment?query") == Url(scheme: "https", authority: "12:12:12:12", host: "12:12:12", port: 12, path: "/path", fragment: "fragment?query") assert parseURL("https://[12:213:213:fr:12]:1200/path#fragment?query") == Url(scheme: "https", authority: "[12:213:213:fr:12]:1200", host: "[12:213:213:fr:12]", port: 1200, path: "/path", fragment: "fragment?query") assert parseURL("https://userinfo@host.com:8080/path#fragment?query") == Url(scheme: "https", authority: "userinfo@host.com:8080", userinfo: "userinfo", host: "host.com", port: 8080, path: "/path", fragment: "fragment?query") assert parseURL("ftp://ftp.is.co.za/rfc/rfc1808.txt") == Url(scheme: "ftp", authority: "ftp.is.co.za", host: "ftp.is.co.za", path: "/rfc/rfc1808.txt") assert parseURL("http://www.ietf.org/rfc/rfc2396.txt") == Url(scheme: "http", authority: "www.ietf.org", host: "www.ietf.org", path: "/rfc/rfc2396.txt") assert parseURL("ldap://[2001:db8::7]/c=GB?objectClass?one") == Url(scheme: "ldap", authority: "[2001:db8::7]", host: "[2001:db8::7]", path: "/c=GB", query: "objectClass?one") # Todo: Ugh, are these right? assert parseURL("mailto:John.Doe@example.com") == Url(scheme: "mailto", path: "/John.Doe@example.com") # ugh is this right assert parseURL("news:comp.infosystems.www.servers.unix") == Url(scheme: "news", path: "/comp.infosystems.www.servers.unix") assert parseURL("tel:+1-816-555-1212") == Url(scheme: "tel", path: "/+1-816-555-1212") assert parseURL("telnet://192.0.2.16:80/") == Url(scheme: "telnet", authority: "192.0.2.16:80", host: "192.0.2.16", port: 80, path: "/") assert parseURL("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") == Url(scheme: "urn", path: "/oasis:names:specification:docbook:dtd:xml:4.1.2") func encodeURL(url: string): string = for c in url: if c in unreserved: result &= c # Note: `+`? else: result &= "%" & toHex(ord(c), 2)