Ok, here is a patch for that problem.
First, UTF-8 files (specially in windows) may have BOM, so the "crappy
heuristics" was missing it. I added specific search for the BOM magic,
since that is only useful as to say "this is UTF-8 text".
Second, utf8::decode shouldn't be there, since that will cause a double-
encoding. The blob is raw bytes at that point, and it should be sent as-
is.
diff --git a/lib/Gitalist/Controller/Ref.pm
b/lib/Gitalist/Controller/Ref.pm
index 46def57..0f5f31d 100644
--- a/lib/Gitalist/Controller/Ref.pm
+++ b/lib/Gitalist/Controller/Ref.pm
@@ -30,8 +30,7 @@ sub raw : Chained('find') Does('FilenameArgs') Args()
{
|| File::Type->new->mime_type($c->stash->{blob})
);
}
-
- utf8::decode($c->stash->{blob});
+
$c->response->body(delete $c->stash->{blob});
}
diff --git a/lib/Gitalist/Utils.pm b/lib/Gitalist/Utils.pm
index f11dc34..a9879ea 100644
--- a/lib/Gitalist/Utils.pm
+++ b/lib/Gitalist/Utils.pm
@@ -46,7 +46,13 @@ sub age_string {
}
sub is_binary {
- # Crappy heuristic - does the first line or so look printable?
+ # Crappy heuristic - does the first line or so look printable?a
+ { # BOM for UTF-8
+ use bytes;
+ if (substr($_[0],0,3) eq "\xef\xbb\xbf") {
+ return 0;
+ }
+ }
return $_[0] !~ /^[[:print:]]+$ (?: \s ^[[:print:]]+$ )?/mx;
}