#!/usr/bin/env perl
# (c) Uncle MiF, 2006. alexey@mitme.ru

require 5.8.0;
use strict;

our $esc;

############### customize options  here ##########################
# resources rel from .dsl file
my $sRel = '.';# rel path to dict sounds
my $iRel = '.';# rel path to dict images
my $vRel = '.';# rel path to dict videos
# fonts
my $cTranslation = '<font class=ctranslation>%s</font>';
my $cTranscription = '<font class=ctranscription>%s</font>';
my $cMark = '<font class=cmark>%s</font>';
my $cCommentary = '<font class=ccommentary>%s</font>';
my $cExample = '<font class=cexample>%s</font>';
my $cSecondaryContent = '<font class=csecondarycontent>%s</font>';
my $cMultimedia = '<font class=cmultimedia>%s</font>';
my $cImage = '<img src="__E__/Images/%s">';
my $cSound = '<object type="video/quicktime" data="__E__/Sounds/%s"><param name="controller" value="true" /></object>';
my $cVideo = '<a href=“__E__/Movies/%s”>%s</a>';
my $cStress = '<font class=cstress>%s</font>';
my $cFF = '<font class=cff>%s</font>';
my $cDirectColor = '<font color=%s class=cdirectcolor>%s</font>';
###################################################################

our $modules;
our $dsl;
BEGIN
{ 
	$esc = ' &"\'';
 select STDIN;
 $| = 1;
 select STDOUT;
 $| = 1;
 $modules = __FILE__;
	$modules = '.' if ($modules !~ /\//);
 $modules =~ s/\/[^\/]*$//gos;
	my $smodules = $modules;
	$smodules =~ s/([$esc])/\\$1/gos;
	$dsl = `cat $smodules/dsl.css`;
 eval "use lib qw($modules/.);";
}
use lib qw(.);
use localization::localization;
use IPATranscription;

$| = 1;

my $ipa = IPATranscription->new;

my $dname = $ARGV[0];
my $out = $ARGV[1];

my $verPath = $0;
$verPath =~ s/\/[^\/]*$//gos;
$verPath .= '/version';
$verPath =~ s/([$esc])/\\$1/gos;
my $ver = `cat $verPath`;
$ver =~ s/\s+$//gos;

print "\n";
print _L("(c) MiF, MDict, ABBYY Lingvo DSL file parser version %#.\n\n",$ver);
unless($dname)
{
	print _L("Usage: %# <dictFile> [<outDir>]\n",$0);
	print _L("\nExample: %# Fr_Ru.dsl Fr_Ru_dictDir\n\n",$0);
	print _L("\nBe aware: DSL file converted to DSLC with UTF-8 encoding first!!!\n\n",$0);
	exit(1);
}
unless(-f $dname)
{
	print _L("File not found: %#\n",$dname);
	exit(2);
}

my $lastPathComponent = $dname;
$lastPathComponent =~ s/\.dsl$//i;
$lastPathComponent =~ s/^.*\///gos;
if (!$lastPathComponent || $lastPathComponent =~ /^\.+$/)
{
	 print _L("Can't parse properly dictionary path\n");
		 exit(20);
}

$out ||= './Unpacked Dictionaries/';

my $silent = $ARGV[2];
$silent ||= 0;
$silent = ($silent =~ /^silent$/i) ? 1 : 0;

$out .= '/' if ($out !~ /\/$/);
$out .= $lastPathComponent;
my $in = $dname;
$in = '.' if ($in !~ /\//);
$in =~ s/\/[^\/]*$//gso;
$in = '.' unless ($in);
my $sdname = $dname;
$sdname =~ s/([$esc])/\\$1/gos;

my $converted = "$in/$lastPathComponent.dslc";
my $sconverted = $converted;
$sconverted =~ s/([$esc])/\\$1/gos;
if (! -f $converted)
{
	my $enctest = `head -10 $sdname`;
	my $encoding = 'utf-16';
	$encoding = 'cp1251' if ($enctest =~ /#SOURCE_CODE_PAGE\s+"Cyrillic"/i);
	print _L("Converting '%#' to '%#' %# -> utf-8\n",$dname, $converted,$encoding);
	if (system("iconv -f $encoding -t utf-8 $sdname > $sconverted"))
	{
		print _L("iconv converting failed from %# to utf-8 '%#' -> '%#'\n",$encoding,$dname,$converted);
		exit(30);
	}
}

if (! -f $converted)
{
	print _L("Can't found converted file '%#'. You must convert source DSL file to UTF-8 first.\n",$converted);
	exit(31);
}

print _L("Using converted file: '%#'\n",$converted);
print _L("Input directory: '%#'\n",$in);

print _L("Parsing '%#' to directory '%#'\n",$dname,$out);

if (-d $out || -f $out)
{
	print _L("Directory/file '%#' already exists. Remove now [N/yes]? ",$out);
	my $answer = <STDIN>;
	$answer =~ s/\s+$//gos;
	if ($answer !~ /^yes$/i)
	{
		print _L("User canceled (you must type YES to continue)...\n");
		exit(3);
	}
}

my $sout = $out;
$sout =~ s/([$esc])/\\$1/gos;
`rm -rf $sout; mkdir -p $sout`;

my $words = "$out/Words";
my $replacements = "$out/Replacements";
my $extensions = "$out/Extensions";
my $images = "$extensions/Images";
my $sounds = "$extensions/Sounds";
my $videos = "$extensions/Movies";

print _L("Words directory will be: %#\n",$words);
print _L("Replacements directory will be: %#\n",$replacements);
print _L("Extensions directory will be: %#\n",$extensions);
print _L("Sounds directory will be: %#\n",$sounds);
print _L("Images directory will be: %#\n",$images);
print _L("Movies directory will be: %#\n",$videos);

my ($swords, $sreplacements, $sextensions, $simages, $ssounds, $svideos) = 
							($words, $replacements, $extensions, $images, $sounds, $videos);

$swords =~ s/([$esc])/\\$1/gos;
$sreplacements =~ s/([$esc])/\\$1/gos;
$sextensions =~ s/([$esc])/\\$1/gos;
$simages =~ s/([$esc])/\\$1/gos;
$ssounds =~ s/([$esc])/\\$1/gos;
$svideos =~ s/([$esc])/\\$1/gos;
my $scss = "$sextensions/css";

`mkdir -p $swords; mkdir -p $sreplacements; mkdir -p $sextensions; mkdir -p $ssounds; mkdir -p $simages; mkdir -p $svideos;`;
`mkdir -p $scss; echo -e "$dsl" > $scss/dsl.css;`;

print _L("Processing dictionary file...\n");

local * DICT;

my $ok = 1;
open (DICT, "<$converted") or $ok = 0;
unless($ok)
{
	print _L("Can't open %# for reading", $dname);
	exit(4);
}

my $lnum = 0;
my $cards = 0;
my $cardWord = '';
my $cardContent = '';
my $spaces = '';
my $newspaces;
my $canNext = 1;
foreach my $line (<DICT>)
{
	$line =~ s/\r//gos;
	$lnum++;
	next if ($canNext && $line =~ /^#/);
	$canNext = 0;
	if ($line =~ /^[^\t\n ]/gos)
	{
		$cards++ if ($cardWord && $cardContent);
		parseCard() if ($cardContent);
		$cardWord = $line;
		$cardWord =~ s/\s+$//gos;
		$cardContent = '';
		$spaces = '';
	}
	else
	{
# accumulating card body
		$line =~ s/^[\t ]//gos;
		$spaces = '&nbsp;' x $1 if ($line =~ /\[m(\d)\]/osi);
		$line =~ s/\[m\d\]//gosi;
		$newspaces = $spaces;
		$newspaces = '' if ($line =~ s/\[\/m\]//gosi);
		$cardContent .= $spaces . $line;
		$spaces = $newspaces;
	}
}
# last card (on EOF)
parseCard();

sub mark
{
	my ($str, $as, $color) = @_;
	if ($as eq 'translation')
	{
		return sprintf $cTranslation, $str;
	}	
	elsif ($as eq 'transcription')
	{
		return sprintf $cTranscription, $ipa->convert($str);
	}
	elsif ($as eq 'secondary content')
	{
		return sprintf $cSecondaryContent, $str;
	}
	elsif ($as eq 'example')
	{
		return sprintf $cExample, $str;
	}
	elsif ($as eq 'commentary')
	{
		return sprintf $cCommentary, $str;
	}
	elsif ($as eq 'mark')
	{
		return sprintf $cMark, $str;
	}
	elsif ($as eq 'multimedia')
	{
		my $spath;
		if ($str =~ /\.(?:png|bmp|pcx|dcx|jpg|tiff?|gif)$/soi)
		{
			# image
			$spath = "$in/$iRel/$str";
			$spath =~ s/([$esc])/\\$1/gos;
			return '' if (system("cp $spath $simages"));
			return sprintf $cImage, $str;
		}
		elsif ($str =~ /\.(?:wav|au|mp3)$/soi)
		{
			# sound
			$spath = "$in/$sRel/$str";
			$spath =~ s/([$esc])/\\$1/gos;
			return '' if (system("cp $spath $ssounds"));
			return sprintf $cSound, $str;
		}
		elsif ($str =~ /\.(?:avi|mov)/soi)
		{
			# movie
			$spath = "$in/$vRel/$str";
			$spath =~ s/([$esc])/\\$1/gos;
			return '' if (system("cp $spath $svideos"));
			return sprintf $cVideo, $str, $str;
		}
		else
		{
			# without extension hack
			if ($str !~ /\./os)
			{
				# assuming WAV, but first of all we test converted MP3 file
				$spath = "$in/$sRel/$str.mp3";
				if (-f $spath)
				{
					return mark($str . '.mp3', 'multimedia');
				}
				$spath =~ s/mp3$/wav/og;
				if (-f $spath)
				{
					return mark($str . '.wav', 'multimedia');
				}
			}
		}
		return sprintf $cMultimedia, $str;
	}
	elsif ($as eq 'color')
	{
		return sprintf $cMark, $str;
	}
	elsif ($as eq 'direct color')
	{
		return sprintf $cDirectColor, $color, $str;
	}
	elsif ($as eq 'stress')
	{
		return sprintf $cStress, $str;
	}
	elsif ($as eq 'ff')
	{
		return sprintf $cFF, $str;
	}
	return $str;
}

sub parseCard
{
	$cardWord =~ s/^\s+//gos;
	$cardWord =~ s/\s+$//gos;
	my ($word, $article) = ($cardWord, $cardContent);
	return unless ($word ne '' && $article ne '');
	$cardWord = $word if ($word =~ s/\{\['\]\}(.+?)\{\[\/'\]\}/$1/gosi);
	$cardWord = $word if ($word =~ s/\{\[su([bp])\]\}(.*?)\{\[\/su\1\]\}/$2/gosi);
	$cardWord = $word if ($word =~ s/\[\/?[a-z*'0-9]+\]/$2/gosi);
	$cardWord = $word if ($word =~ s/(?!\\)\\(.)/$1/gos);
	while ($word =~ s/\{(.*?)\}/$1/os)
	{
		$cardWord =~ s/\{.*?\}//os;
		$cards++;# produce derived card
		parseCard();
	}
	$word =~ s/^\s+//gos;
	$word =~ s/\s+$//gos;
	$word =~ s/\\(.)/$1/gos;

	{
		$article =~ s/</&lt;/gosi;
		$article =~ s/>/&gt;/gosi;

		$article =~ s/\[([biu])\](.*?)\[\/\1\]/<$1>$2<\/$1>/gosi;# b, i, u
		$article =~ s/\[\*\](.+?)\[\/\*\]/mark($1,'secondary content')/gosei;# always turn secodary content to on
		$article =~ s/\[ex\](.+?)\[\/ex\]/mark($1,'example')/gosei;# example
		$article =~ s/\[trn\](.+?)\[\/trn\]/mark($1,'translation')/gosei;# translation zone
		$article =~ s/\[t\](.+?)\[\/t\]/mark($1,'transcription')/gosei;# transcription zone
		$article =~ s/\[com\](.+?)\[\/com\]/mark($1,'commentary')/gosei;# commentary
		$article =~ s/\[ff\](.+?)\[\/ff\]/mark($1,'ff')/gosei;# ff tag
		$article =~ s/\[p\](.+?)\[\/p\]/mark($1,'mark')/gosei;# mark
		$article =~ s/\[!trs\](.*?)\[\/!trs\]/$1/gosi;# stay trs on place
		$article =~ s/\[lang[^\]]*\](.*?)\[\/lang\]/$1/gosi;# stay lang on place
		$article =~ s/\[(ref|url)\](.*?)\[\/\1\]/[:[$2]:]/gosi;# REF | URL
		$article =~ s/\[ref\s+dict[^\]]+\](.*?)\[\/ref\]/[:[$1]:]/gosi;# REF to other dict
		$article =~ s/<<([^>]+)>>/[:[$1]:]/gosi;# REF to current dict
		$article =~ s/\[s\](.+?)\[\/s\]/mark($1,'multimedia')/gosei;# multimedia
		$article =~ s/\[c\](.+?)\[\/c\]/mark($1,'color')/gosei;# color as mark
		$article =~ s/\[c\s+([^\]]+)\](.+?)\[\/c\]/mark($2,'direct color',$1)/gosei;# color as specified
		$article =~ s/\['\](.+?)\[\/'\]/mark($1,'stress')/gosei;# stress
		$article =~ s/\[su([bp])\](.*?)\[\/su\1\]/<su$1>$2<\/su$1>/gosi;# sub/sup
		$article =~ s/(?!\\)\@/<p>/gos;# paragraph
		$article =~ s/\\(.)/$1/gos;
		$article = "<link type=text/css rel=stylesheet href=\"__E__/css/dsl.css\" />$article";

		local * ART;
		my $ok = 1;
 	open (ART, ">$words/$word-$lnum.html") or $ok = 0;
		if ($ok)
		{
			print ART $article;
			close(ART);
		}
		else
		{
			$ok = 1;
			open (ART, ">$replacements/_REPLACEMENT_$lnum.html") or $ok = 0;
			if ($ok)
			{
				print _L("Using replacement file for word '%#'\n",$word);
				print ART $word;
				close(ART);
				$ok = 1;
				open (ART, ">$words/_REPLACEMENT_$lnum.html") or $ok = 0;
				if ($ok)
				{
					print ART $article;
					close(ART);
				}
				else
				{
					print L("Failed: Something wrong with word '%#' and replacement _REPLACEMENT_%#\n", $word, $lnum);
				}
			}
			else
			{
				print _L("Can't write article to file '%#'\n",$word);
			}
		}
	}
}

close(DICT);

print _L("Total words processed: %#\n",$cards);

my $iCnt = `ls $simages | wc -l`;
my $sCnt = `ls $ssounds | wc -l`;
my $mCnt = `ls $svideos | wc -l`;
my $cCnt = `ls $scss | wc -l`;
$iCnt =~ s/\D//gs; 
$sCnt =~ s/\D//gs; 
$mCnt =~ s/\D//gs; 
$cCnt =~ s/\D//gs; 
print _L("Images: %#, Sounds: %#, Movies: %#, Styles: %#\n",$iCnt,$sCnt,$mCnt,$cCnt);
if (!$iCnt && !$sCnt && !$mCnt && !$cCnt)
{
	print _L("No multimedia files found. Removing Extensions directory.\n");
	system("rm -rf $sextensions");
}
else
{
	system("ln -s ../Extensions $swords/__E__");
}

print _L("Done\n");
system("open $sout") unless ($silent);
