#!/usr/bin/ruby -w
# encoding: UTF-8
# == Synopsis
#
# canonical_to_plain_html: generates HTML from canonical plain text (HTML suitable for sending)
#
# == Usage
#
# canonical_to_plain_html [FILE]
#
# FILE: The canonical document to read (defaults to stdin)

require 'cgi'

no_links = ARGV.include?('--no-links')
ARGV.delete('--no-links')

io = ARGF

is_old_ruby = RUBY_VERSION.split('.')[0] == "1"

$link_regexp = 0
if is_old_ruby
   # This can be dropped when going 10.9+ (verify that first)
   $link_regexp = Regexp.new('(((https?|mid|cid|message|s?ftp|ftps|file|smb|afp|nfs|(x-)?man(-page)?|gopher):\/\/|mailto:)[-:@!a-z0-9_.,~%*+\/?=&()#;]*[@a-z0-9_~%+\/=&(])')
else
   # Works better with UTF-8
   $link_regexp = Regexp.new('(((https?|mid|cid|message|s?ftp|ftps|file|smb|afp|nfs|(x-)?man(-page)?|gopher):\/\/|mailto:)[-:@![:word:].,~%*+\/?=&()#;]*[@[:word:]~%+\/=&(])')
end

def prepare_linkify (text)
  # Some links are tricky to handle, for example <https://freron.com>, since escapeHTML is called before linkify.
  # We handle this by marking links without using HTML before calling escapeHTML.
  return text.gsub($link_regexp, '93C67E4B9D2B44E7\1FC756B349A13DC5C')
end

def linkify (text)
  return text.gsub(/93C67E4B9D2B44E7(.*?)FC756B349A13DC5C/, '<a href="\1">\1</a>')
end

block_prefix = "<blockquote>\n"
block_suffix = "</blockquote>\n"

old_quote_level = 0
new_quote_level = 0
empty_line_found = false
paragraph_open = false
while line = io.gets
  new_quote_level = line.match('>*')[0].size
  diff = new_quote_level - old_quote_level

  if paragraph_open and diff != 0 then
    puts '</p>'
    paragraph_open = false
  end

  (1..diff).each {|i| print block_prefix} if diff > 0
  (1..-diff).each {|i| print block_suffix} if diff < 0

  # Note: An alternative to the 'paragraph_open/br' solution is to use "white-space:pre-wrap;". This would be much nicer, but I'm not sure it'll work well in general (for receiving email clients).
  # The current solution should be augmented with the use of &nbsp; to allow extra whitespace.

  # In the canonical format, the character '>' can be space-stuffed.
  # If space-stuffed, we should skip a single space.
  # If space after quotes, we should skip a single space.
  skip_space = (line.match('^ +>') or line.match('^>+ ')) ? 1 : 0
  if line.size > new_quote_level+skip_space+1
    if not paragraph_open then
      print '<p dir="auto">'
      paragraph_open = true
    else
      puts "<br/>"
    end
    if no_links
      print CGI::escapeHTML(line[new_quote_level + skip_space..-1])
    else
      print linkify(CGI::escapeHTML(prepare_linkify(line[new_quote_level + skip_space..-1])))
    end
    empty_line_found = false
  else
    if paragraph_open then
      puts '</p>'
      paragraph_open = false
    end
    puts empty_line_found ? "<br/>" : ""
    empty_line_found = true
  end
  old_quote_level = new_quote_level;
end

if paragraph_open then
  puts '</p>'
end

(1..new_quote_level).each {|i| print block_suffix}
