Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix validation of Using items with generics and escaping #255

Merged
merged 3 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -656,5 +656,33 @@ public void SdkNameExpressionWithProperty ()
ignoreDiagnostics: [CoreDiagnostics.UnwrittenProperty]
);
}

[Test]
public void InvalidCSharpType ()
{
var source = TextWithMarkers.Parse (@"<Project>
<PropertyGroup>
<SomeType>|Foo-Bar|</SomeType>
<SomeType>System.Collections.Generic.Dictionary%3CMicrosoft.NET.Sdk.WorkloadManifestReader.WorkloadId, Microsoft.NET.Sdk.WorkloadManifestReader.WorkloadDefinition%3E</SomeType>
</PropertyGroup>
</Project>", '|');

var schema = new MSBuildSchema {
new PropertyInfo ("SomeType", "", MSBuildValueKind.CSharpType)
};

var spans = source.GetMarkedSpans ('|');

var expected = new MSBuildDiagnostic (
CoreDiagnostics.InvalidCSharpType,
spans[0],
messageArgs: [ "Foo-Bar" ]
);

VerifyDiagnostics (source.Text, out _,
includeCoreDiagnostics: true,
expectedDiagnostics: [ expected ],
schema: schema);
}
}
}
9 changes: 8 additions & 1 deletion MonoDevelop.MSBuild/Language/CoreDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -511,10 +511,17 @@ class CoreDiagnostics
public const string InvalidClrType_Id = nameof (InvalidClrType);
public static readonly MSBuildDiagnosticDescriptor InvalidClrType = new (
InvalidClrType_Id,
"Invalid qualified .NET type name",
"Invalid .NET type name",
"The value `{0}` is not a valid qualified .NET type name string",
MSBuildDiagnosticSeverity.Error);

public const string InvalidCSharpType_Id = nameof (InvalidCSharpType);
public static readonly MSBuildDiagnosticDescriptor InvalidCSharpType = new (
InvalidCSharpType_Id,
"Invalid C# type name",
"The value `{0}` is not a valid qualified C# type name string",
MSBuildDiagnosticSeverity.Error);

public const string InvalidClrTypeName_Id = nameof (InvalidClrTypeName);
public static readonly MSBuildDiagnosticDescriptor InvalidClrTypeName = new (
InvalidClrTypeName_Id,
Expand Down
18 changes: 14 additions & 4 deletions MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ namespace MonoDevelop.MSBuild.Language.Expressions
[DebuggerDisplay ("{Value} (IsPure: {IsPure})")]
public class ExpressionText : ExpressionNode
{
/// <summary>
/// The raw value of this text, including any whitespace and XML entities and MSBuild escape sequences.
/// </summary>
// TODO: audit access to this and make sure they should not be calling GetUnescapedValue instead
public string Value { get; }

/// <summary>
/// Gets the unescaped value of this text, optionally trimming whitespace.
/// Gets the unescaped value of this text, optionally trimming whitespace. This unescapes
/// both XML entities and MSBuild %XX escape sequences.
/// </summary>
/// <param name="trim">Whether to trim leading and trailing whitespace.</param>
/// <param name="trimmedOffset">The offset of the text, taking optional trimming into account.</param>
Expand Down Expand Up @@ -41,17 +46,22 @@ public string GetUnescapedValue (bool trim, out int trimmedOffset, out int escap
trimmedOffset = start + Offset;
}

return XmlEscaping.UnescapeEntities (value);
// TODO: technically there could be escaped whitespace, should we trim it too?
var xmlUnescaped = XmlEscaping.UnescapeEntities (value);
var msbuildUnescaped = Util.MSBuildEscaping.UnescapeString (xmlUnescaped);

return msbuildUnescaped;
}

/// <summary>
/// Indicates whether this exists by itself, as opposed to being concatenated with other values.
/// </summary>
public bool IsPure { get; }

public ExpressionText (int offset, string value, bool isPure) : base (offset, value.Length)
// TODO: add ctor that takes unescaped value and audit callers
public ExpressionText (int offset, string rawValue, bool isPure) : base (offset, rawValue.Length)
{
Value = value;
Value = rawValue;
IsPure = isPure;
}

Expand Down
24 changes: 9 additions & 15 deletions MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -857,22 +857,28 @@ void VisitPureLiteral (MSBuildElementSyntax elementSyntax, MSBuildAttributeSynta
break;
}
case MSBuildValueKind.ClrNamespace:
if (!IsValidTypeOrNamespace (value, out _)) {
if (!TypeNameValidation.IsValidClrNamespace (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrNamespace, value);
}
break;

case MSBuildValueKind.ClrType:
if (!IsValidTypeOrNamespace (value, out _)) {
if (!TypeNameValidation.IsValidClrTypeOrNamespace (value, out _, out _)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrType, value);
}
break;

case MSBuildValueKind.ClrTypeName:
if (!(IsValidTypeOrNamespace (value, out int componentCount) && componentCount == 1)) {
if (!TypeNameValidation.IsValidClrTypeName (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrTypeName, value);
}
break;

case MSBuildValueKind.CSharpType:
if (!TypeNameValidation.IsValidCSharpTypeOrNamespace (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidCSharpType, value);
}
break;
}

void AddErrorWithArgs (MSBuildDiagnosticDescriptor d, params object[] args) => Diagnostics.Add (d, new TextSpan (trimmedOffset, escapedLength), args);
Expand All @@ -889,18 +895,6 @@ void AddMisspelledValueError (MSBuildDiagnosticDescriptor d, params object[] arg
args
);
}

static bool IsValidTypeOrNamespace (string value, out int componentCount)
{
string[] components = value.Split ('.');
componentCount = components.Length;
foreach (var component in components) {
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
return false;
}
}
return true;
}
}

void CheckPropertyWrite (PropertyInfo resolvedProperty, TextSpan span)
Expand Down
230 changes: 230 additions & 0 deletions MonoDevelop.MSBuild/Language/TypeNameValidation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#if NETCOREAPP
#nullable enable
#endif

using System.Globalization;

namespace MonoDevelop.MSBuild.Language;

static class TypeNameValidation
{
public static bool IsValidCSharpTypeOrNamespace (string value) => IsValidCSharpTypeOrNamespace(value, out _, out _);

public static bool IsValidCSharpTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
{
int offset = 0;
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
return false;
}
return offset == value.Length;
}


static bool ConsumeCSharpGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
{
isGenericType = false;

componentCount = ConsumeIdentifiers(value, ref offset);

if (componentCount == 0) {
return false;
}

if (offset == value.Length) {
return true;
}

// try to consume generics in the form SomeType<SomeOtherType,AnotherType>
if (value[offset] != '<') {
// complete but not a generic type, we are done consuming, caller decides what to do with next char
return true;
}
offset++;

isGenericType = true;
do {
// Ignore whitespace after a comma as that's pretty common e.g. SomeType<SomeOtherType, AnotherType>
// and is valid if this is going to be injected verbatim into a C# file.
// We could be more strict and disallow whitespace, but that will likely trip people up.
// We could also be more liberal and allow general whitespace around the angle brackets and commas,
// but that's a bit of a rabbit hole and not likely to be very useful.
if (value[offset] == ',') {
offset++;
ConsumeSpaces(value, ref offset);
}
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
return false;
}
} while (value[offset] == ',');

if (offset < value.Length && value[offset] == '>') {
offset++;
return true;
}

return false;
}

static void ConsumeSpaces(string value, ref int offset)
{
while (value[offset] == ' ') {
offset++;
}
}

public static bool IsValidClrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out var hasGenerics) && !hasGenerics;

public static bool IsValidClrTypeName (string value) => IsValidClrTypeOrNamespace(value, out var componentCount, out var hasGenerics) && componentCount == 1 && !hasGenerics;

public static bool IsValidClrTypeOrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out _);

public static bool IsValidClrTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
{
int offset = 0;
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
return false;
}
return offset == value.Length;
}

static bool ConsumeClrGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
{
isGenericType = false;

componentCount = ConsumeIdentifiers(value, ref offset);

if (componentCount == 0) {
return false;
}

if (offset == value.Length) {
return true;
}

// try to consume generics in the form SomeType`2[SomeOtherType,AnotherType]
if (value[offset] != '`') {
// complete but not a generic type, we are done consuming, caller decides what to do with next char
return true;
}
offset++;

int argCountOffset = offset;
if(!ConsumeNumbers(value, ref offset) || !int.TryParse(value.Substring(argCountOffset, offset - argCountOffset), out int genericArgCount)) {
return false;
}

isGenericType = true;
if (value[offset++] != '[') {
return false;
}

for (int i = 0; i < genericArgCount; i++) {
// recursively consume the generic type arguments
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
return false;
}
if (i < genericArgCount - 1) {
if (offset < value.Length && value[offset] == ',') {
offset++;
} else {
return false;
}
}
}

if (offset < value.Length && value[offset] == ']') {
offset++;
return true;
}

return false;
}

static bool ConsumeNumbers (string value, ref int offset)
{
int start = offset;
while(char.IsDigit(value[offset])) {
offset++;
}
return offset > start;
}

static int ConsumeIdentifiers (string value, ref int offset)
{
int componentCount = 0;

while(ConsumeIdentifier(value, ref offset)) {
componentCount++;
if (offset >= value.Length || value[offset] != '.') {
return componentCount;
}
offset++;
}

if (componentCount > 0 && value[offset - 1] == '.') {
// we consumed a dot but no identifier followed it, so walk it back
offset--;
}

return componentCount;
}

static bool ConsumeIdentifier (string value, ref int offset)
{
bool nextMustBeStartChar = true;

while (offset < value.Length) {
char ch = value[offset];
// based on https://github.com/dotnet/runtime/blob/96be510135829ff199c6c341ad461c36bab4809e/src/libraries/Common/src/System/CSharpHelpers.cs#L141
UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(ch);
switch (uc) {
case UnicodeCategory.UppercaseLetter: // Lu
case UnicodeCategory.LowercaseLetter: // Ll
case UnicodeCategory.TitlecaseLetter: // Lt
case UnicodeCategory.ModifierLetter: // Lm
case UnicodeCategory.LetterNumber: // Lm
case UnicodeCategory.OtherLetter: // Lo
nextMustBeStartChar = false;
offset++;
continue;

case UnicodeCategory.NonSpacingMark: // Mn
case UnicodeCategory.SpacingCombiningMark: // Mc
case UnicodeCategory.ConnectorPunctuation: // Pc
case UnicodeCategory.DecimalDigitNumber: // Nd
// Underscore is a valid starting character, even though it is a ConnectorPunctuation.
if (nextMustBeStartChar && ch != '_')
return false;
offset++;
continue;

default:
// unlike CSharpHelpers.cs, don't check IsSpecialTypeChar here because we're only looking
// for identifier components that are valid for all languages, and generic syntax is
// handled separately.
//
break;
}
break;
}

// return true if we have been able to consume a valid identifier. it need
// not be the entire string.
return !nextMustBeStartChar;
}

public static bool IsValidCSharpType (string value, out int componentCount)
{
string[] components = value.Split ('.');
componentCount = components.Length;
foreach (var component in components) {
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
return false;
}
}
return true;
}
}
Loading
Loading