diff options
author | Max Kanat-Alexander <mkanat@bugzilla.org> | 2011-08-09 14:04:31 -0700 |
---|---|---|
committer | Max Kanat-Alexander <mkanat@bugzilla.org> | 2011-08-09 14:04:31 -0700 |
commit | b308699b2c0453392c86215cecc4fe508a0e1762 (patch) | |
tree | 27b85bdd675e49598949bb416be46941d2a3b626 /email_in.pl | |
parent | Bug 437076: Allow email_in to accept multipart/alternative HTML email with (diff) | |
download | bugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.tar.gz bugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.tar.bz2 bugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.zip |
Bug 660691: Allow Bugzilla to parse HTML-only inbound email via email_in.pl
r=glob, a=mkanat
Diffstat (limited to 'email_in.pl')
-rwxr-xr-x | email_in.pl | 41 |
1 files changed, 34 insertions, 7 deletions
diff --git a/email_in.pl b/email_in.pl index a835c3c9a..f16d56175 100755 --- a/email_in.pl +++ b/email_in.pl @@ -39,6 +39,7 @@ use Email::Address; use Email::Reply qw(reply); use Email::MIME; use Getopt::Long qw(:config bundling); +use HTML::FormatText::WithLinks; use Pod::Usage; use Encode; use Scalar::Util qw(blessed); @@ -68,6 +69,7 @@ use constant SIGNATURE_DELIMITER => '-- '; use constant BODY_TYPES => qw( text/plain text/html + application/xhtml+xml multipart/alternative ); @@ -321,7 +323,7 @@ sub get_body_and_attachments { # Note that this only happens if the email does not contain any # text/plain parts. If the email has an empty text/plain part, # you're fine, and this message does NOT get thrown. - ThrowUserError('email_no_text_plain'); + ThrowUserError('email_no_body'); } debug_print("Picked Body:\n$body", 2); @@ -343,18 +345,43 @@ sub get_text_alternative { } debug_print("Alternative Part Content-Type: $ct", 2); debug_print("Alternative Part Character Encoding: $charset", 2); - if (!$ct || $ct =~ /^text\/plain/i) { - $body = $part->body; - if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) { - $body = Encode::decode($charset, $body); - } - last; + # If we find a text/plain body here, return it immediately. + if (!$ct || $ct =~ m{^text/plain}i) { + return _decode_body($charset, $part->body); + } + # If we find a text/html body, decode it, but don't return + # it immediately, because there might be a text/plain alternative + # later. This could be any HTML type. + if ($ct =~ m{^application/xhtml\+xml}i or $ct =~ m{text/html}i) { + my $parser = HTML::FormatText::WithLinks->new( + # Put footnnote indicators after the text, not before it. + before_link => '', + after_link => '[%n]', + # Convert bold and italics, use "*" for bold instead of "_". + with_emphasis => 1, + bold_marker => '*', + # If the same link appears multiple times, only create + # one footnote. + unique_links => 1, + # If the link text is the URL, don't create a footnote. + skip_linked_urls => 1, + ); + $body = _decode_body($charset, $part->body); + $body = $parser->parse($body); } } return $body; } +sub _decode_body { + my ($charset, $body) = @_; + if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) { + return Encode::decode($charset, $body); + } + return $body; +} + sub remove_leading_blank_lines { my ($text) = @_; $text =~ s/^(\s*\n)+//s; |