From a8a49bc75cac93d6784334281810e6e8dcf6531a Mon Sep 17 00:00:00 2001 From: Michael Becker Date: Sat, 6 Aug 2022 18:57:02 -0400 Subject: [PATCH] improve new line detection and handling --- .../IO/NewLineSequence.cs | 12 ++++ Libraries/UniversalEditor.Core/IO/Reader.cs | 65 ++++++++++++++++--- .../IO/ReaderWriterBase.cs | 18 +++-- 3 files changed, 81 insertions(+), 14 deletions(-) diff --git a/Libraries/UniversalEditor.Core/IO/NewLineSequence.cs b/Libraries/UniversalEditor.Core/IO/NewLineSequence.cs index 4d79a621..dbc7cb3b 100644 --- a/Libraries/UniversalEditor.Core/IO/NewLineSequence.cs +++ b/Libraries/UniversalEditor.Core/IO/NewLineSequence.cs @@ -23,6 +23,18 @@ namespace UniversalEditor.IO { public enum NewLineSequence { + /// + /// Determines the new line sequence based on whether there is a '\r' or a + /// '\n', or both, at the end of a line. + /// + /// + /// DO NOT USE ON BINARY FILES OR OVER NETWORK STREAMS. The detection + /// logic requires a line to end eventually with a '\r' or a '\n' + /// character. universal editor will keep reading until it encounters one + /// of those characters, or hits the end of the file. on a network stream, + /// this may never happen, and your program may end up waiting forever. + /// + Automatic = -2, /// /// Determines the new line sequence based on the system default (CR on Mac OS up to version 9, LF on Linux, /// CRLF on Windows). diff --git a/Libraries/UniversalEditor.Core/IO/Reader.cs b/Libraries/UniversalEditor.Core/IO/Reader.cs index 42ba6eab..9eb19e76 100644 --- a/Libraries/UniversalEditor.Core/IO/Reader.cs +++ b/Libraries/UniversalEditor.Core/IO/Reader.cs @@ -1075,11 +1075,7 @@ namespace UniversalEditor.IO return encoding.GetString(data); } - public byte[] ReadUntil(byte[] sequence) - { - return ReadUntil(sequence, false); - } - public byte[] ReadUntil(byte[] sequence, bool includeSequence) + public byte[] ReadUntil(byte[] sequence, bool includeSequence = false) { byte[] w = new byte[0]; while (!EndOfStream) @@ -1107,6 +1103,8 @@ namespace UniversalEditor.IO if (!includeSequence) { Array.Resize(ref w, w.Length - sequence.Length); + + // HACK: we aren't including the sequence in the response, BUT we should consume it anyway... right? try { Seek(-sequence.Length, SeekOrigin.Current); @@ -1115,6 +1113,7 @@ namespace UniversalEditor.IO { } + } return w; } @@ -1785,11 +1784,57 @@ namespace UniversalEditor.IO public string ReadLine() { StringBuilder sb = new StringBuilder(); - string line = ReadUntil(GetNewLineSequence()); - // ReadChars(GetNewLineSequence().Length); - if (line.EndsWith("\r")) - line = line.Substring(0, line.Length - 1); - return line; + if (NewLineSequence == NewLineSequence.Automatic && _ActualNewLineSequenceForAutomatic == NewLineSequence.Default) + { + // first time around, determine actual new line sequence + while (!EndOfStream) + { + char c = ReadChar(); + if (c == '\n') + { + char c2 = PeekChar(); + if (c2 == '\r') + { + ReadChar(); + _ActualNewLineSequenceForAutomatic = NewLineSequence.LineFeedCarriageReturn; + break; + } + else + { + _ActualNewLineSequenceForAutomatic = NewLineSequence.LineFeed; + break; + } + } + else if (c == '\r') + { + char c2 = PeekChar(); + if (c2 == '\n') + { + ReadChar(); + _ActualNewLineSequenceForAutomatic = NewLineSequence.CarriageReturnLineFeed; + break; + } + else + { + _ActualNewLineSequenceForAutomatic = NewLineSequence.CarriageReturn; + break; + } + } + else + { + sb.Append(c); + } + } + return sb.ToString(); + } + else + { + string line = ReadUntil(GetNewLineSequence()); + ReadChars(GetNewLineSequence().Length); + if (line.EndsWith("\r")) + line = line.Substring(0, line.Length - 1); + return line; + } } /// diff --git a/Libraries/UniversalEditor.Core/IO/ReaderWriterBase.cs b/Libraries/UniversalEditor.Core/IO/ReaderWriterBase.cs index d3492a83..c846e477 100644 --- a/Libraries/UniversalEditor.Core/IO/ReaderWriterBase.cs +++ b/Libraries/UniversalEditor.Core/IO/ReaderWriterBase.cs @@ -46,12 +46,18 @@ namespace UniversalEditor.IO public Transformation.TransformationCollection Transformations { get; } = new Transformation.TransformationCollection(); - private NewLineSequence mvarNewLineSequence = NewLineSequence.Default; - public NewLineSequence NewLineSequence { get { return mvarNewLineSequence; } set { mvarNewLineSequence = value; } } - public string GetNewLineSequence() + protected NewLineSequence _ActualNewLineSequenceForAutomatic = NewLineSequence.Default; + public NewLineSequence NewLineSequence { get; set; } = NewLineSequence.Automatic; + + private string GetNewLineSequence(NewLineSequence newLineSequence) { + if (newLineSequence == NewLineSequence.Automatic) + { + return GetNewLineSequence(_ActualNewLineSequenceForAutomatic); + } + string newline = System.Environment.NewLine; - switch (mvarNewLineSequence) + switch (newLineSequence) { case IO.NewLineSequence.CarriageReturn: { @@ -76,6 +82,10 @@ namespace UniversalEditor.IO } return newline; } + public string GetNewLineSequence() + { + return GetNewLineSequence(NewLineSequence); + } public ReaderWriterBase(Accessor accessor) {