A Clojure(Script) Implementation Of The WHATWG URL Spec
A port of the WHATWG URL spec in Clojure(Script).
The WHATWG URL spec is really handy for parsing URLs and thought it useful to port it to Clojure(Script).
Usage
(:require [com.evanlouie.net.url :refer [parse]])
(parse "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2")
;; => #:com.evanlouie.net.url{:hash "#header2", :protocol "https:", :password "my-password", :pathname "/path/to/page", :username "evan", :hostname "some.sub.domain.evanlouie.com", :port "1337", :search "?query1=foo&query2=bar", :host "some.sub.domain.evanlouie.com:1337", :origin "https://some.sub.domain.evanlouie.com:1337", :href "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"}
See the tests in the snippet for more usage examples.
src/com/evanlouie/net/url.cljc
(ns com.evanlouie.net.url
(:require [clojure.spec.alpha :as s]))
(def ^:private url-regex
"A regex to parse a URL.
Note: matching this regex does not imply a valid href as it allows for an
empty pathname to be provided (i.e. no trailing slash when at root domain).
Hrefs require a non empty pathname (at least a trailing `/`). For a regex which
validates for hrefs, refer to the `::href` spec.
e.g. This regex will match `http://www.evanlouie.com` but is not a valid href
| URL Segment | Required/Optional |
| --------------------- | ----------------- |
| Protocol | Required |
| Hostname | Required |
| Username | Optional |
| Password | Optional |
| Port | Optional |
| Pathname | Optional |
| Search/Get-Parameters | Optional |
| Hash | Optional |
@example (re-matches url-regex \"http://foobar.com\")
@example (re-matches url-regex \"https://foo:bar@some.nested.dns.com:1337/path/to/page?query=some-value#some-header\")"
#"^([a-zA-Z]+:)//(([^:]+)(:(.+))?@)?(([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]\.)+)([a-zA-Z]{2,63})(:(\d+))?(/([^?#]+)?(\?([^#]+)?)?(#(.+)?)?)?$")
;;------------------------------------------------------------------------------
;; Specs
(s/def ::url (s/and string? #(re-matches url-regex %)))
(s/def ::hash (s/nilable (s/and string? #(re-matches #"^(#.+)?$" %))))
(s/def ::host (s/and string? #(re-matches #"^([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]\.)+[a-zA-Z]{2,63}(:(\d+))?$" %)))
(s/def ::hostname (s/and string? #(re-matches #"^([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]\.)+[a-zA-Z]{2,63}$" %)))
(s/def ::href (s/and string? #(re-matches #"^[a-zA-Z]+://([^:]+(:(.+))?@)?([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]\.)+([a-zA-Z]{2,63})(:(\d+))?/(([^?#]+)?(\?([^#]+)?)?(#(.+)?)?)?$" %)))
(s/def ::origin (s/and string? #(re-matches #"^[a-zA-Z]+://([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]\.)+[a-zA-Z]{2,63}(:(\d+))?$" %)))
(s/def ::password (s/nilable string?))
(s/def ::pathname (s/nilable (s/and string? #(re-matches #"^/(([^?#]+)?)?$" %))))
(s/def ::port (s/nilable (s/and string? #(re-matches #"^(\d+)?$" %))))
(s/def ::protocol (s/and string? #(re-matches #"^[a-zA-Z]+:$" %)))
(s/def ::search (s/nilable (s/and string? #(re-matches #"^(\?[^#]+)?$" %))))
(s/def ::username (s/nilable string?))
(s/def ::location (s/keys :req [::hash
::host
::hostname
::href
::origin
::password
::pathname
::port
::protocol
::search
::username]))
(s/fdef parse
:args (s/cat :url ::url)
:ret ::location)
;;------------------------------------------------------------------------------
;; Functions
(defn parse
"Parses the provided `url` into a map modelled after the HTML Living Standard
Location specification.
The returned `::location` will be namespaced unless `namespaced` is set to
false.
@see https://developer.mozilla.org/en-US/docs/Web/API/Location
@see https://html.spec.whatwg.org/multipage/history.html#the-location-interface
@see https://www.w3.org/TR/html52/browsers.html#the-location-interface
@throws when unable to parse the provided `url`
@throws when unable to generate a valid `::location`"
[url & {:keys [namespaced]
:or {namespaced true}}]
;; throw when url does not conform
(when (not (s/valid? ::url url))
(throw (ex-info "Invalid URL"
{:url url
:regex url-regex
:reason "Could not parse the provided URL with the URL regex"
:spec (s/explain-str ::url url)})))
;; destructure the regex into location map
;; can alternatively done via destructuring: [_ protocol _ username _ password domains-with-dot-suffix _ tld _ port _ pathname-no-slash-prefix search _ hash _] url-match
(let [url-match (re-matches url-regex url) ; will always pass as `url` is a valid `::url`
protocol (url-match 1)
username (url-match 3)
password (url-match 5)
domains-with-dot-suffix (url-match 6)
tld (url-match 8)
port (url-match 10)
pathname-no-slash-prefix (url-match 12)
search (url-match 13)
hash (url-match 15)
;; compose the ::location requirements
hostname (str domains-with-dot-suffix tld)
host (str hostname (when port (str ":" port)))
origin (str protocol "//" host)
pathname (str "/" pathname-no-slash-prefix)
auth (when username (str username (when password (str ":" password)) "@"))
href (str protocol "//" auth host pathname search hash)
location {::hash hash
::host host
::hostname hostname
::href href
::origin origin
::password password
::pathname pathname
::port port
::protocol protocol
::search search
::username username}]
;; throw if the generated location is not valid
(when (not (s/valid? ::location location))
(throw (ex-info "Generated location is not valid"
{:location location
:spec (s/explain-str ::location location)})))
;; return namespaced map if `namespaced` is true
(if namespaced
location
;; return un-namespaced ::location
(zipmap (map (comp keyword name) (keys location))
(vals location)))))
src/com/evanlouie/net/url_test.cljc
(ns com.evanlouie.net.url-test
(:require [com.evanlouie.net.url :as sut]
[clojure.spec.alpha :as s]
#?(:clj [clojure.test :as t]
:cljs [cljs.test :as t :include-macros true])))
(def ^:private equality-tests
"List of equality tests to execute where `:name` is the name of test, `:actual`
is actual value, and `:expected` is the expected value."
[{:name "basic url"
:actual (sut/parse "http://evanlouie.com")
:expected #:com.evanlouie.net.url
{:hash nil
:host "evanlouie.com"
:hostname "evanlouie.com"
:href "http://evanlouie.com/"
:origin "http://evanlouie.com"
:password nil
:pathname "/"
:port nil
:protocol "http:"
:search nil
:username nil}}
{:name "complex url"
:actual (sut/parse "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2")
:expected #:com.evanlouie.net.url
{:hash "#header2"
:host "some.sub.domain.evanlouie.com:1337"
:hostname "some.sub.domain.evanlouie.com"
:href "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:origin "https://some.sub.domain.evanlouie.com:1337"
:password "my-password"
:pathname "/path/to/page"
:port "1337"
:protocol "https:"
:search "?query1=foo&query2=bar"
:username "evan"}}
{:name "auth with username but no password"
:actual (sut/parse "https://evan@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2")
:expected #:com.evanlouie.net.url
{:hash "#header2"
:host "some.sub.domain.evanlouie.com:1337"
:hostname "some.sub.domain.evanlouie.com"
:href "https://evan@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:origin "https://some.sub.domain.evanlouie.com:1337"
:password nil
:pathname "/path/to/page"
:port "1337"
:protocol "https:"
:search "?query1=foo&query2=bar"
:username "evan"}}
{:name "basic url -- no namespace"
:actual (sut/parse "http://evanlouie.com"
:namespaced false)
:expected {:hash nil
:host "evanlouie.com"
:hostname "evanlouie.com"
:href "http://evanlouie.com/"
:origin "http://evanlouie.com"
:password nil
:pathname "/"
:port nil
:protocol "http:"
:search nil
:username nil}}
{:name "complex url -- no namespace"
:actual (sut/parse "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:namespaced false)
:expected {:hash "#header2"
:host "some.sub.domain.evanlouie.com:1337"
:hostname "some.sub.domain.evanlouie.com"
:href "https://evan:my-password@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:origin "https://some.sub.domain.evanlouie.com:1337"
:password "my-password"
:pathname "/path/to/page"
:port "1337"
:protocol "https:"
:search "?query1=foo&query2=bar"
:username "evan"}}
{:name "auth with username but no password -- no namespace"
:actual (sut/parse "https://evan@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:namespaced false)
:expected {:hash "#header2"
:host "some.sub.domain.evanlouie.com:1337"
:hostname "some.sub.domain.evanlouie.com"
:href "https://evan@some.sub.domain.evanlouie.com:1337/path/to/page?query1=foo&query2=bar#header2"
:origin "https://some.sub.domain.evanlouie.com:1337"
:password nil
:pathname "/path/to/page"
:port "1337"
:protocol "https:"
:search "?query1=foo&query2=bar"
:username "evan"}}])
(t/deftest test-parse
;; run the equality tests
(doseq [{:keys [name expected actual]} equality-tests]
(t/testing name
(t/is (= expected actual))))
(t/testing "invalid URL throws"
(let [exception-pattern (if (s/check-asserts?)
#"(?s)^Spec assertion failed.+$"
#"^Invalid URL$")]
(t/is (thrown-with-msg? Exception exception-pattern
(sut/parse "some-illegal-url"))))))