Date: 2011-11-20.
I wanted to replace URLs in some plain text data with links. Thankfully Daring Fireball’s regex saves me from doing any hard work. I did hit an unexpected snag (though in retrospect it should have been obvious) - an href without a protocol will be treated as a relative path, so if a detected URL has no protocol one must be added.
; From John Gruber's http://daringfireball.net/2010/07/improved_regex_for_matching_urls | |
; Added a capture group around the protocol so we can distinguish matches that contained it. | |
(def url-regex #"(?i)\b((?:([a-z][\w-]+:(?:/{1,3}|[a-z0-9%]))|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))") | |
(defn linkify-urls | |
"Add A tags around parts of the string that look likely to be URLs." | |
[text] | |
(let [matcher (re-matcher url-regex text)] | |
(if-not (.find matcher) | |
text ; avoid building a StringBuffer (and presumably copying the string) if there were no matches | |
(let [result (StringBuffer.)] | |
(loop [] | |
(if (.group matcher 2) | |
(.appendReplacement matcher result "<a class=\"detected-link\" href=\"$1\">$1</a>") | |
; If there was no protocol, we need to add one so the browser won't treat this as a relative link. | |
(.appendReplacement matcher result "<a class=\"detected-link\" href=\"http://$1\">$1</a>")) | |
(if (.find matcher) | |
(recur) | |
(.toString (.appendTail matcher result)))))))) |