From a2ff48798d2ccd97f51a748b79a64d97a984c996 Mon Sep 17 00:00:00 2001 From: Mikayla Hutchinson Date: Thu, 26 Sep 2024 20:28:12 -0400 Subject: [PATCH 1/3] Remove duplicate MSBuild escaping implementation --- MonoDevelop.MSBuild/Util/MSBuildEscaping.cs | 64 ++------------------- 1 file changed, 4 insertions(+), 60 deletions(-) diff --git a/MonoDevelop.MSBuild/Util/MSBuildEscaping.cs b/MonoDevelop.MSBuild/Util/MSBuildEscaping.cs index ac06cbe6..cbb3b5ea 100644 --- a/MonoDevelop.MSBuild/Util/MSBuildEscaping.cs +++ b/MonoDevelop.MSBuild/Util/MSBuildEscaping.cs @@ -30,8 +30,6 @@ #endif using System; -using System.Collections.Generic; -using System.Globalization; using System.IO; //imported from MonoDevelop.Projects.MSBuild.MSBuildProjectService @@ -39,41 +37,8 @@ namespace MonoDevelop.MSBuild.Util { static class MSBuildEscaping { - static readonly char[] specialCharacters = { '%', '$', '@', '(', ')', '\'', ';', '?' }; - static readonly Dictionary specialCharactersEscaped; - static readonly Dictionary specialCharactersUnescaped; - - static MSBuildEscaping () - { - specialCharactersEscaped = new Dictionary (specialCharacters.Length); - specialCharactersUnescaped = new Dictionary (specialCharacters.Length); - for (int i = 0; i < specialCharacters.Length; ++i) { - var escaped = ((int)specialCharacters[i]).ToString ("X"); - specialCharactersEscaped[specialCharacters[i]] = '%' + escaped; - specialCharactersUnescaped[escaped] = specialCharacters[i]; - } - } - public static string EscapeString (string str) - { - int i = str.IndexOfAny (specialCharacters); - if (i != -1) { - var sb = new System.Text.StringBuilder (); - int start = 0; - while (i != -1) { - sb.Append (str, start, i - start); - sb.Append (specialCharactersEscaped[str[i]]); - if (i >= str.Length) - break; - start = i + 1; - i = str.IndexOfAny (specialCharacters, start); - } - if (start < str.Length) - sb.Append (str, start, str.Length - start); - return sb.ToString (); - } - return str; - } + => Microsoft.Build.Shared.EscapingUtilities.Escape (str); public static string UnescapePath (string path) { @@ -83,32 +48,11 @@ public static string UnescapePath (string path) if (!Platform.IsWindows) path = path.Replace ("\\", "/"); - return UnscapeString (path); + return UnescapeString (path); } - public static string UnscapeString (string str) - { - int i = str.IndexOf ('%'); - if (i != -1) { - var sb = new System.Text.StringBuilder (); - int start = 0; - while (i != -1) { - var sub = str.Substring (i + 1, 2); - if (specialCharactersUnescaped.TryGetValue (sub, out char ch)) { - sb.Append (str, start, i - start); - sb.Append (ch); - } else if (int.TryParse (sub, NumberStyles.HexNumber, null, out int c)) { - sb.Append (str, start, i - start); - sb.Append ((char)c); - } - start = i + 3; - i = str.IndexOf ('%', start); - } - sb.Append (str, start, str.Length - start); - return sb.ToString (); - } - return str; - } + public static string UnescapeString (string str) + => Microsoft.Build.Shared.EscapingUtilities.UnescapeAll (str); public static string ToMSBuildPath (string absPath, string? baseDirectory = null, bool normalize = true) { From eddf52831e91770be695954b4ff3ae2cb48098d9 Mon Sep 17 00:00:00 2001 From: Mikayla Hutchinson Date: Thu, 26 Sep 2024 22:44:43 -0400 Subject: [PATCH 2/3] Unescape MSBuild escape sequences in ExpressionText values --- .../Language/Expressions/ExpressionText.cs | 18 ++++++++++++++---- MonoDevelop.MSBuild/Util/XmlEscaping.cs | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs b/MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs index 543b4e55..f0e2ec5b 100644 --- a/MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs +++ b/MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs @@ -10,10 +10,15 @@ namespace MonoDevelop.MSBuild.Language.Expressions [DebuggerDisplay ("{Value} (IsPure: {IsPure})")] public class ExpressionText : ExpressionNode { + /// + /// The raw value of this text, including any whitespace and XML entities and MSBuild escape sequences. + /// + // TODO: audit access to this and make sure they should not be calling GetUnescapedValue instead public string Value { get; } /// - /// Gets the unescaped value of this text, optionally trimming whitespace. + /// Gets the unescaped value of this text, optionally trimming whitespace. This unescapes + /// both XML entities and MSBuild %XX escape sequences. /// /// Whether to trim leading and trailing whitespace. /// The offset of the text, taking optional trimming into account. @@ -41,7 +46,11 @@ public string GetUnescapedValue (bool trim, out int trimmedOffset, out int escap trimmedOffset = start + Offset; } - return XmlEscaping.UnescapeEntities (value); + // TODO: technically there could be escaped whitespace, should we trim it too? + var xmlUnescaped = XmlEscaping.UnescapeEntities (value); + var msbuildUnescaped = Util.MSBuildEscaping.UnescapeString (xmlUnescaped); + + return msbuildUnescaped; } /// @@ -49,9 +58,10 @@ public string GetUnescapedValue (bool trim, out int trimmedOffset, out int escap /// public bool IsPure { get; } - public ExpressionText (int offset, string value, bool isPure) : base (offset, value.Length) + // TODO: add ctor that takes unescaped value and audit callers + public ExpressionText (int offset, string rawValue, bool isPure) : base (offset, rawValue.Length) { - Value = value; + Value = rawValue; IsPure = isPure; } diff --git a/MonoDevelop.MSBuild/Util/XmlEscaping.cs b/MonoDevelop.MSBuild/Util/XmlEscaping.cs index 4014ec04..23a8c54e 100644 --- a/MonoDevelop.MSBuild/Util/XmlEscaping.cs +++ b/MonoDevelop.MSBuild/Util/XmlEscaping.cs @@ -6,6 +6,7 @@ namespace MonoDevelop.MSBuild.Language { + // TODO: move this to MonoDevelop.Xml class XmlEscaping { static readonly Dictionary entities = new Dictionary { From 9720806e8fa80cd9c4cb31e12b01c63811f39a75 Mon Sep 17 00:00:00 2001 From: Mikayla Hutchinson Date: Mon, 30 Sep 2024 13:24:33 -0400 Subject: [PATCH 3/3] Add new csharp-type type and rework type name validation Fixes #253 Using Include validation is incorrect for types that have CLR generics --- .../Analyzers/CoreDiagnosticTests.cs | 28 +++ .../Language/CoreDiagnostics.cs | 9 +- .../Language/MSBuildDocumentValidator.cs | 24 +- .../Language/TypeNameValidation.cs | 230 ++++++++++++++++++ .../Language/Typesystem/MSBuildValueKind.cs | 25 ++ .../Schema/DescriptionFormatter.cs | 2 + .../Schema/MSBuildSchema.TypeInfoLoader.cs | 3 +- .../Schemas/CSharp.buildschema.json | 4 +- MonoDevelop.MSBuild/Schemas/buildschema.json | 10 +- 9 files changed, 313 insertions(+), 22 deletions(-) create mode 100644 MonoDevelop.MSBuild/Language/TypeNameValidation.cs diff --git a/MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs b/MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs index 689c4785..132cad9a 100644 --- a/MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs +++ b/MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs @@ -656,5 +656,33 @@ public void SdkNameExpressionWithProperty () ignoreDiagnostics: [CoreDiagnostics.UnwrittenProperty] ); } + + [Test] + public void InvalidCSharpType () + { + var source = TextWithMarkers.Parse (@" + + |Foo-Bar| + System.Collections.Generic.Dictionary%3CMicrosoft.NET.Sdk.WorkloadManifestReader.WorkloadId, Microsoft.NET.Sdk.WorkloadManifestReader.WorkloadDefinition%3E + +", '|'); + + var schema = new MSBuildSchema { + new PropertyInfo ("SomeType", "", MSBuildValueKind.CSharpType) + }; + + var spans = source.GetMarkedSpans ('|'); + + var expected = new MSBuildDiagnostic ( + CoreDiagnostics.InvalidCSharpType, + spans[0], + messageArgs: [ "Foo-Bar" ] + ); + + VerifyDiagnostics (source.Text, out _, + includeCoreDiagnostics: true, + expectedDiagnostics: [ expected ], + schema: schema); + } } } diff --git a/MonoDevelop.MSBuild/Language/CoreDiagnostics.cs b/MonoDevelop.MSBuild/Language/CoreDiagnostics.cs index d3ccd623..c5918bca 100644 --- a/MonoDevelop.MSBuild/Language/CoreDiagnostics.cs +++ b/MonoDevelop.MSBuild/Language/CoreDiagnostics.cs @@ -511,10 +511,17 @@ class CoreDiagnostics public const string InvalidClrType_Id = nameof (InvalidClrType); public static readonly MSBuildDiagnosticDescriptor InvalidClrType = new ( InvalidClrType_Id, - "Invalid qualified .NET type name", + "Invalid .NET type name", "The value `{0}` is not a valid qualified .NET type name string", MSBuildDiagnosticSeverity.Error); + public const string InvalidCSharpType_Id = nameof (InvalidCSharpType); + public static readonly MSBuildDiagnosticDescriptor InvalidCSharpType = new ( + InvalidCSharpType_Id, + "Invalid C# type name", + "The value `{0}` is not a valid qualified C# type name string", + MSBuildDiagnosticSeverity.Error); + public const string InvalidClrTypeName_Id = nameof (InvalidClrTypeName); public static readonly MSBuildDiagnosticDescriptor InvalidClrTypeName = new ( InvalidClrTypeName_Id, diff --git a/MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs b/MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs index 9b47a9b0..a8f68804 100644 --- a/MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs +++ b/MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs @@ -857,22 +857,28 @@ void VisitPureLiteral (MSBuildElementSyntax elementSyntax, MSBuildAttributeSynta break; } case MSBuildValueKind.ClrNamespace: - if (!IsValidTypeOrNamespace (value, out _)) { + if (!TypeNameValidation.IsValidClrNamespace (value)) { AddErrorWithArgs (CoreDiagnostics.InvalidClrNamespace, value); } break; case MSBuildValueKind.ClrType: - if (!IsValidTypeOrNamespace (value, out _)) { + if (!TypeNameValidation.IsValidClrTypeOrNamespace (value, out _, out _)) { AddErrorWithArgs (CoreDiagnostics.InvalidClrType, value); } break; case MSBuildValueKind.ClrTypeName: - if (!(IsValidTypeOrNamespace (value, out int componentCount) && componentCount == 1)) { + if (!TypeNameValidation.IsValidClrTypeName (value)) { AddErrorWithArgs (CoreDiagnostics.InvalidClrTypeName, value); } break; + + case MSBuildValueKind.CSharpType: + if (!TypeNameValidation.IsValidCSharpTypeOrNamespace (value)) { + AddErrorWithArgs (CoreDiagnostics.InvalidCSharpType, value); + } + break; } void AddErrorWithArgs (MSBuildDiagnosticDescriptor d, params object[] args) => Diagnostics.Add (d, new TextSpan (trimmedOffset, escapedLength), args); @@ -889,18 +895,6 @@ void AddMisspelledValueError (MSBuildDiagnosticDescriptor d, params object[] arg args ); } - - static bool IsValidTypeOrNamespace (string value, out int componentCount) - { - string[] components = value.Split ('.'); - componentCount = components.Length; - foreach (var component in components) { - if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) { - return false; - } - } - return true; - } } void CheckPropertyWrite (PropertyInfo resolvedProperty, TextSpan span) diff --git a/MonoDevelop.MSBuild/Language/TypeNameValidation.cs b/MonoDevelop.MSBuild/Language/TypeNameValidation.cs new file mode 100644 index 00000000..13a214a8 --- /dev/null +++ b/MonoDevelop.MSBuild/Language/TypeNameValidation.cs @@ -0,0 +1,230 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if NETCOREAPP +#nullable enable +#endif + +using System.Globalization; + +namespace MonoDevelop.MSBuild.Language; + +static class TypeNameValidation +{ + public static bool IsValidCSharpTypeOrNamespace (string value) => IsValidCSharpTypeOrNamespace(value, out _, out _); + + public static bool IsValidCSharpTypeOrNamespace (string value, out int componentCount, out bool isGenericType) + { + int offset = 0; + if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) { + return false; + } + return offset == value.Length; + } + + + static bool ConsumeCSharpGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType) + { + isGenericType = false; + + componentCount = ConsumeIdentifiers(value, ref offset); + + if (componentCount == 0) { + return false; + } + + if (offset == value.Length) { + return true; + } + + // try to consume generics in the form SomeType + if (value[offset] != '<') { + // complete but not a generic type, we are done consuming, caller decides what to do with next char + return true; + } + offset++; + + isGenericType = true; + do { + // Ignore whitespace after a comma as that's pretty common e.g. SomeType + // and is valid if this is going to be injected verbatim into a C# file. + // We could be more strict and disallow whitespace, but that will likely trip people up. + // We could also be more liberal and allow general whitespace around the angle brackets and commas, + // but that's a bit of a rabbit hole and not likely to be very useful. + if (value[offset] == ',') { + offset++; + ConsumeSpaces(value, ref offset); + } + if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) { + return false; + } + } while (value[offset] == ','); + + if (offset < value.Length && value[offset] == '>') { + offset++; + return true; + } + + return false; + } + + static void ConsumeSpaces(string value, ref int offset) + { + while (value[offset] == ' ') { + offset++; + } + } + + public static bool IsValidClrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out var hasGenerics) && !hasGenerics; + + public static bool IsValidClrTypeName (string value) => IsValidClrTypeOrNamespace(value, out var componentCount, out var hasGenerics) && componentCount == 1 && !hasGenerics; + + public static bool IsValidClrTypeOrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out _); + + public static bool IsValidClrTypeOrNamespace (string value, out int componentCount, out bool isGenericType) + { + int offset = 0; + if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) { + return false; + } + return offset == value.Length; + } + + static bool ConsumeClrGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType) + { + isGenericType = false; + + componentCount = ConsumeIdentifiers(value, ref offset); + + if (componentCount == 0) { + return false; + } + + if (offset == value.Length) { + return true; + } + + // try to consume generics in the form SomeType`2[SomeOtherType,AnotherType] + if (value[offset] != '`') { + // complete but not a generic type, we are done consuming, caller decides what to do with next char + return true; + } + offset++; + + int argCountOffset = offset; + if(!ConsumeNumbers(value, ref offset) || !int.TryParse(value.Substring(argCountOffset, offset - argCountOffset), out int genericArgCount)) { + return false; + } + + isGenericType = true; + if (value[offset++] != '[') { + return false; + } + + for (int i = 0; i < genericArgCount; i++) { + // recursively consume the generic type arguments + if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) { + return false; + } + if (i < genericArgCount - 1) { + if (offset < value.Length && value[offset] == ',') { + offset++; + } else { + return false; + } + } + } + + if (offset < value.Length && value[offset] == ']') { + offset++; + return true; + } + + return false; + } + + static bool ConsumeNumbers (string value, ref int offset) + { + int start = offset; + while(char.IsDigit(value[offset])) { + offset++; + } + return offset > start; + } + + static int ConsumeIdentifiers (string value, ref int offset) + { + int componentCount = 0; + + while(ConsumeIdentifier(value, ref offset)) { + componentCount++; + if (offset >= value.Length || value[offset] != '.') { + return componentCount; + } + offset++; + } + + if (componentCount > 0 && value[offset - 1] == '.') { + // we consumed a dot but no identifier followed it, so walk it back + offset--; + } + + return componentCount; + } + + static bool ConsumeIdentifier (string value, ref int offset) + { + bool nextMustBeStartChar = true; + + while (offset < value.Length) { + char ch = value[offset]; + // based on https://github.com/dotnet/runtime/blob/96be510135829ff199c6c341ad461c36bab4809e/src/libraries/Common/src/System/CSharpHelpers.cs#L141 + UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(ch); + switch (uc) { + case UnicodeCategory.UppercaseLetter: // Lu + case UnicodeCategory.LowercaseLetter: // Ll + case UnicodeCategory.TitlecaseLetter: // Lt + case UnicodeCategory.ModifierLetter: // Lm + case UnicodeCategory.LetterNumber: // Lm + case UnicodeCategory.OtherLetter: // Lo + nextMustBeStartChar = false; + offset++; + continue; + + case UnicodeCategory.NonSpacingMark: // Mn + case UnicodeCategory.SpacingCombiningMark: // Mc + case UnicodeCategory.ConnectorPunctuation: // Pc + case UnicodeCategory.DecimalDigitNumber: // Nd + // Underscore is a valid starting character, even though it is a ConnectorPunctuation. + if (nextMustBeStartChar && ch != '_') + return false; + offset++; + continue; + + default: + // unlike CSharpHelpers.cs, don't check IsSpecialTypeChar here because we're only looking + // for identifier components that are valid for all languages, and generic syntax is + // handled separately. + // + break; + } + break; + } + + // return true if we have been able to consume a valid identifier. it need + // not be the entire string. + return !nextMustBeStartChar; + } + + public static bool IsValidCSharpType (string value, out int componentCount) + { + string[] components = value.Split ('.'); + componentCount = components.Length; + foreach (var component in components) { + if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) { + return false; + } + } + return true; + } +} diff --git a/MonoDevelop.MSBuild/Language/Typesystem/MSBuildValueKind.cs b/MonoDevelop.MSBuild/Language/Typesystem/MSBuildValueKind.cs index ac0879b3..4a74a282 100644 --- a/MonoDevelop.MSBuild/Language/Typesystem/MSBuildValueKind.cs +++ b/MonoDevelop.MSBuild/Language/Typesystem/MSBuildValueKind.cs @@ -103,10 +103,35 @@ public enum MSBuildValueKind NuGetVersion, // .NET namespace and type names + + /// + /// A .NET namespace name, in language-agnostic form. + /// ClrNamespace, + + /// + /// A .NET namespace or qualified type name, in language-agnostic form i.e. C# generic syntax is not allowed, + /// but CLR generic syntax A`1[B,C] is allowed. + /// + /// + /// If you need to support C# generic syntax, use instead. + /// ClrType, + + /// + /// A .NET unqualified type name. May not include generics of any form. This is typically used for the name of + /// a type to be generated at build time. + /// ClrTypeName, + /// + /// C# namespace or qualified type name. May include generics in C# format e.g. MyNamespace.MyType<int>. + /// + /// + /// If you need .NET language-agnostic format, use instead. + /// + CSharpType, + /// /// Base type for warning codes. Tools should use a derived custom type to define their warning codes. /// Anything of this type will receive completion and validation for all derived warning code types. diff --git a/MonoDevelop.MSBuild/Schema/DescriptionFormatter.cs b/MonoDevelop.MSBuild/Schema/DescriptionFormatter.cs index ac0023cb..e56b494b 100644 --- a/MonoDevelop.MSBuild/Schema/DescriptionFormatter.cs +++ b/MonoDevelop.MSBuild/Schema/DescriptionFormatter.cs @@ -286,6 +286,8 @@ static string FormatKind (MSBuildValueKind kind, CustomTypeInfo customTypeInfo) return "clr-type-name"; case MSBuildValueKind.WarningCode: return "warning-code"; + case MSBuildValueKind.CSharpType: + return "csharp-type"; } return null; } diff --git a/MonoDevelop.MSBuild/Schema/MSBuildSchema.TypeInfoLoader.cs b/MonoDevelop.MSBuild/Schema/MSBuildSchema.TypeInfoLoader.cs index d2283a1c..fc1e278b 100644 --- a/MonoDevelop.MSBuild/Schema/MSBuildSchema.TypeInfoLoader.cs +++ b/MonoDevelop.MSBuild/Schema/MSBuildSchema.TypeInfoLoader.cs @@ -146,6 +146,7 @@ public bool TryHandle(string key, JToken token) { "clr-namespace", MSBuildValueKind.ClrNamespace }, { "clr-type", MSBuildValueKind.ClrType }, { "clr-type-name", MSBuildValueKind.ClrTypeName }, - { "warning-code", MSBuildValueKind.WarningCode } + { "csharp-type", MSBuildValueKind.CSharpType }, + { "warning-code", MSBuildValueKind.WarningCode }, }; } diff --git a/MonoDevelop.MSBuild/Schemas/CSharp.buildschema.json b/MonoDevelop.MSBuild/Schemas/CSharp.buildschema.json index d3997355..0957e490 100644 --- a/MonoDevelop.MSBuild/Schemas/CSharp.buildschema.json +++ b/MonoDevelop.MSBuild/Schemas/CSharp.buildschema.json @@ -77,8 +77,8 @@ "items": { "Using": { "description": "A C# global using to add to the project.", - "includeDescription": "The namespace or type identifier to add, e.g. Microsoft.AspNetCore", - "type": "clr-namespace", + "includeDescription": "The namespace or type identifier to add, e.g. `Microsoft.AspNetCore`", + "type": "csharp-type", "metadata": { "Alias": { "description": "Optional alias for the namespace or type.", diff --git a/MonoDevelop.MSBuild/Schemas/buildschema.json b/MonoDevelop.MSBuild/Schemas/buildschema.json index 89505fd3..749b123e 100644 --- a/MonoDevelop.MSBuild/Schemas/buildschema.json +++ b/MonoDevelop.MSBuild/Schemas/buildschema.json @@ -458,15 +458,19 @@ }, { "const": "clr-namespace", - "description": "A .NET namespace" + "description": "A .NET namespace name, in language-agnostic form." }, { "const": "clr-type", - "description": "A fully qualified .NET type name" + "description": "A .NET namespace or qualified type name, in language-agnostic form i.e. C# generic syntax is not allowed, but CLR generic syntax ``A`1[B,C]`` is allowed. If you need to support C# generic syntax, use `csharp-type` instead." }, { "const": "clr-type-name", - "description": "An unqualified .NET type name" + "description": "A .NET unqualified type name. May not include generics of any form. This is typically used for the name of a type to be generated at build time." + }, + { + "const": "csharp-type", + "description": "C# namespace or qualified type name. May include generics in C# format e.g. `MyNamespace.MyType`. If you need .NET language-agnostic format, use `clr-type` instead." }, { "const": "warning-code",