add capability to detect byte-order marks in plain text files

This commit is contained in:
Michael Becker 2019-11-05 16:23:02 -05:00
parent 3316d05342
commit e055b0e39f
No known key found for this signature in database
GPG Key ID: 389DFF5D73781A12
3 changed files with 66 additions and 0 deletions

View File

@ -0,0 +1,31 @@
//
// ByteOrderMark.cs
//
// Author:
// Mike Becker <alcexhim@gmail.com>
//
// Copyright (c) 2019 Mike Becker
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
using System;
namespace UniversalEditor.DataFormats.Text.Plain
{
public enum ByteOrderMark
{
None,
UTF8,
UTF16LittleEndian,
UTF16BigEndian
}
}

View File

@ -37,6 +37,8 @@ namespace UniversalEditor.DataFormats.Text.Plain
return _dfr;
}
public ByteOrderMark ByteOrderMark { get; set; } = ByteOrderMark.None;
protected override void LoadInternal(ref ObjectModel objectModel)
{
PlainTextObjectModel ptom = (objectModel as PlainTextObjectModel);
@ -44,6 +46,38 @@ namespace UniversalEditor.DataFormats.Text.Plain
throw new ObjectModelNotSupportedException();
Reader reader = Accessor.Reader;
// determine if we have BOM
if (reader.Accessor.Length >= 4)
{
byte b1 = reader.ReadByte();
byte b2 = reader.ReadByte();
byte b3 = reader.ReadByte();
byte b4 = reader.ReadByte();
if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF)
{
ByteOrderMark = ByteOrderMark.UTF8;
reader.Accessor.Seek(-1, SeekOrigin.Current);
}
else if ((b1 == 0xFE && b2 == 0xFF) || (b1 == 0xFF && b2 == 0xFE))
{
if (b1 == 0xFE && b2 == 0xFF)
{
ByteOrderMark = ByteOrderMark.UTF16LittleEndian;
}
else
{
ByteOrderMark = ByteOrderMark.UTF16BigEndian;
}
reader.Accessor.Seek(-2, SeekOrigin.Current);
}
else
{
ByteOrderMark = ByteOrderMark.None;
reader.Accessor.Seek(-4, SeekOrigin.Current);
}
}
while (!reader.EndOfStream)
{
string line = reader.ReadLine();

View File

@ -188,6 +188,7 @@
<Compile Include="ObjectModels\BinaryGrammar\GrammarItems\GrammarItemNumber.cs" />
<Compile Include="ObjectModels\BinaryGrammar\GrammarItems\GrammarItemString.cs" />
<Compile Include="ObjectModels\BinaryGrammar\FixedValue.cs" />
<Compile Include="DataFormats\Text\Plain\ByteOrderMark.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\UniversalEditor.Core\UniversalEditor.Core.csproj">