From c04c8ac747f9f21eedbc9113d7ee4d6562bcef47 Mon Sep 17 00:00:00 2001 From: Michael Becker Date: Mon, 11 Nov 2019 06:11:14 -0500 Subject: [PATCH] improve text file determination for shorter files and UTF-8 with BOM --- .../Libraries/UniversalEditor.UserInterface/MainWindow.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CSharp/Libraries/UniversalEditor.UserInterface/MainWindow.cs b/CSharp/Libraries/UniversalEditor.UserInterface/MainWindow.cs index 15c4de9b..2ad95182 100644 --- a/CSharp/Libraries/UniversalEditor.UserInterface/MainWindow.cs +++ b/CSharp/Libraries/UniversalEditor.UserInterface/MainWindow.cs @@ -492,7 +492,7 @@ namespace UniversalEditor.UserInterface /// /// try to determine within a reasonable doubt whether or not is a "plain text" file (e.g. ASCII, UTF-8, UTF-16lE, UTF-16BE, UTF-32, etc.) /// - /// true, if text was ised, false otherwise. + /// true, if the specified file appears to be a text file, false otherwise. /// Filename. private bool isText(string filename) { @@ -500,13 +500,17 @@ namespace UniversalEditor.UserInterface return false; int len = 2048; + System.IO.FileInfo fi = new System.IO.FileInfo(filename); + len = (int)Math.Min(len, fi.Length); System.IO.FileStream fs = System.IO.File.Open(filename, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read); byte[] b = fs.ReadBytes(0, len); string utf8 = System.Text.Encoding.UTF8.GetString(b); // yes I know this isn't the best way to do this - for (int i = 0; i < utf8.Length; i++) + bool isUTF8 = (b.Length >= 3 && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF); + int start = isUTF8 ? 3 : 0; + for (int i = start; i < utf8.Length; i++) { if (Char.IsControl(utf8[i]) && !Char.IsWhiteSpace(utf8[i])) {