From c8e1678e556fda775ee4c729192f21ebef9d454a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Kaan=20K=C3=B6se?= Date: Tue, 17 Feb 2026 22:12:27 +0100 Subject: [PATCH] Fix HtmlPreviewVisitor regressions and add sanitization tests (#813) --- .../Models/MailItem/HtmlPreviewVisitor.cs | 281 +++++++++++++----- .../Models/HtmlPreviewVisitorTests.cs | 76 +++++ 2 files changed, 279 insertions(+), 78 deletions(-) create mode 100644 Wino.Core.Tests/Models/HtmlPreviewVisitorTests.cs diff --git a/Wino.Core.Domain/Models/MailItem/HtmlPreviewVisitor.cs b/Wino.Core.Domain/Models/MailItem/HtmlPreviewVisitor.cs index 4a8481a4..f12cab52 100644 --- a/Wino.Core.Domain/Models/MailItem/HtmlPreviewVisitor.cs +++ b/Wino.Core.Domain/Models/MailItem/HtmlPreviewVisitor.cs @@ -13,8 +13,18 @@ namespace Wino.Core.Domain.Models.MailItem; /// public class HtmlPreviewVisitor : MimeVisitor { - List stack = new List(); - List attachments = new List(); + private static readonly HashSet BlockedTags = new(StringComparer.OrdinalIgnoreCase) + { + "script", "iframe", "frame", "frameset", "object", "embed", "applet", "base", "meta", "form" + }; + + private static readonly HashSet AllowedDataImageMimeTypes = new(StringComparer.OrdinalIgnoreCase) + { + "image/png", "image/jpeg", "image/jpg", "image/gif", "image/webp", "image/bmp", "image/x-icon", "image/avif", "image/svg+xml" + }; + + private readonly List stack = []; + private readonly List attachments = []; readonly string tempDir; @@ -89,9 +99,14 @@ public class HtmlPreviewVisitor : MimeVisitor // look up the image based on the img src url within our multipart/related stack bool TryGetImage(string url, out MimePart image) { + image = null; + + if (string.IsNullOrWhiteSpace(url)) + return false; + UriKind kind; int index; - Uri uri; + Uri uri = null; if (Uri.IsWellFormedUriString(url, UriKind.Absolute)) kind = UriKind.Absolute; @@ -106,24 +121,50 @@ public class HtmlPreviewVisitor : MimeVisitor } catch { - image = null; - return false; + // noop: we still attempt CID/content-id lookup below. } for (int i = stack.Count - 1; i >= 0; i--) { - if ((index = stack[i].IndexOf(uri)) == -1) + if (uri != null && (index = stack[i].IndexOf(uri)) != -1) + { + image = stack[i][index] as MimePart; + + if (image != null) + return true; + } + + var normalizedContentId = NormalizeContentId(url); + + if (string.IsNullOrEmpty(normalizedContentId)) continue; - image = stack[i][index] as MimePart; - return image != null; + foreach (var relatedPart in stack[i]) + { + if (relatedPart is not MimePart candidate || string.IsNullOrEmpty(candidate.ContentId)) + continue; + + if (string.Equals(candidate.ContentId.Trim('<', '>'), normalizedContentId, StringComparison.OrdinalIgnoreCase)) + { + image = candidate; + return true; + } + } } - image = null; - return false; } + private static string NormalizeContentId(string url) + { + var trimmed = url.Trim().Trim('\'', '"', '<', '>'); + + if (trimmed.StartsWith("cid:", StringComparison.OrdinalIgnoreCase)) + trimmed = trimmed[4..]; + + return trimmed.Trim('<', '>'); + } + // Save the image to our temp directory and return a "file://" url suitable for // the browser control to load. // Note: if you'd rather embed the image data into the HTML, you can construct a @@ -139,83 +180,173 @@ public class HtmlPreviewVisitor : MimeVisitor return string.Format("data:{0};base64,{1}", image.ContentType.MimeType, base64); } - - //string fileName = url - // .Replace(':', '_') - // .Replace('\\', '_') - // .Replace('/', '_'); - - //string path = Path.Combine(tempDir, fileName); - - //if (!File.Exists(path)) - //{ - // using (var output = File.Create(path)) - // image.Content.DecodeTo(output); - //} - - //return "file://" + path.Replace('\\', '/'); } - // Replaces urls that refer to images embedded within the message with - // "file://" urls that the browser control will actually be able to load. + // Replaces image references that refer to images embedded within the message with + // "data:" urls the browser control can load. Also sanitizes dangerous tags/attributes. void HtmlTagCallback(HtmlTagContext ctx, HtmlWriter htmlWriter) { - if (ctx.TagId == HtmlTagId.Image && !ctx.IsEndTag && stack.Count > 0) + var tagName = ctx.TagName; + + if (BlockedTags.Contains(tagName)) { - ctx.WriteTag(htmlWriter, false); - - // replace the src attribute with a file:// URL - foreach (var attribute in ctx.Attributes) - { - if (attribute.Id == HtmlAttributeId.Src) - { - MimePart image; - string url; - - if (!TryGetImage(attribute.Value, out image)) - { - htmlWriter.WriteAttribute(attribute); - continue; - } - - url = SaveImage(image); - - htmlWriter.WriteAttributeName(attribute.Name); - htmlWriter.WriteAttributeValue(url); - } - else - { - htmlWriter.WriteAttribute(attribute); - } - } + ctx.DeleteTag = true; + ctx.DeleteEndTag = true; + return; } - else if (ctx.TagId == HtmlTagId.Body && !ctx.IsEndTag) - { - ctx.WriteTag(htmlWriter, false); - // add and/or replace oncontextmenu="return false;" - foreach (var attribute in ctx.Attributes) + if (ctx.IsEndTag) + { + ctx.WriteTag(htmlWriter, true); + return; + } + + ctx.WriteTag(htmlWriter, false); + + foreach (var attribute in ctx.Attributes) + { + var attributeName = attribute.Name; + + if (ShouldDropAttribute(tagName, attributeName)) + continue; + + if (TryResolveImageAttribute(tagName, attributeName, attribute.Value, out var resolvedValue)) { - if (attribute.Name.ToLowerInvariant() == "oncontextmenu") + htmlWriter.WriteAttributeName(attributeName); + htmlWriter.WriteAttributeValue(resolvedValue); + continue; + } + + if (IsUrlAttribute(attributeName)) + { + if (!TrySanitizeUrlValue(attribute.Value, out var sanitizedUrl)) continue; - htmlWriter.WriteAttribute(attribute); + htmlWriter.WriteAttributeName(attributeName); + htmlWriter.WriteAttributeValue(sanitizedUrl); + continue; } + htmlWriter.WriteAttribute(attribute); + } + + if (ctx.TagId == HtmlTagId.Body) htmlWriter.WriteAttribute("oncontextmenu", "return false;"); - } - else + } + + private bool TryResolveImageAttribute(string tagName, string attributeName, string value, out string resolvedValue) + { + resolvedValue = null; + + if (string.IsNullOrWhiteSpace(value)) + return false; + + var lowerAttributeName = attributeName.ToLowerInvariant(); + var isImageTag = string.Equals(tagName, "img", StringComparison.OrdinalIgnoreCase); + + if (isImageTag && lowerAttributeName == "srcset") { - if (ctx.TagId == HtmlTagId.Unknown) - { - ctx.DeleteTag = true; - ctx.DeleteEndTag = true; - } - else - { - ctx.WriteTag(htmlWriter, true); - } + resolvedValue = ResolveSrcSet(value); + return resolvedValue != value; } + + if (lowerAttributeName != "src" && lowerAttributeName != "background" && lowerAttributeName != "poster") + return false; + + if (TryGetImage(value, out var image)) + { + resolvedValue = SaveImage(image); + return true; + } + + return false; + } + + private string ResolveSrcSet(string srcSetValue) + { + var candidates = srcSetValue.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + var updatedCandidates = new List(candidates.Length); + + foreach (var candidate in candidates) + { + var parts = candidate.Split(' ', 2, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); + + if (parts.Length == 0) + continue; + + var imageSource = parts[0]; + + if (TryGetImage(imageSource, out var image)) + imageSource = SaveImage(image); + + updatedCandidates.Add(parts.Length == 2 ? $"{imageSource} {parts[1]}" : imageSource); + } + + return string.Join(", ", updatedCandidates); + } + + private static bool ShouldDropAttribute(string tagName, string attributeName) + { + if (attributeName.StartsWith("on", StringComparison.OrdinalIgnoreCase)) + return true; + + if (string.Equals(tagName, "body", StringComparison.OrdinalIgnoreCase) + && string.Equals(attributeName, "oncontextmenu", StringComparison.OrdinalIgnoreCase)) + return true; + + if (string.Equals(attributeName, "srcdoc", StringComparison.OrdinalIgnoreCase)) + return true; + + return false; + } + + private static bool IsUrlAttribute(string attributeName) + => string.Equals(attributeName, "href", StringComparison.OrdinalIgnoreCase) + || string.Equals(attributeName, "src", StringComparison.OrdinalIgnoreCase) + || string.Equals(attributeName, "action", StringComparison.OrdinalIgnoreCase) + || string.Equals(attributeName, "xlink:href", StringComparison.OrdinalIgnoreCase) + || string.Equals(attributeName, "background", StringComparison.OrdinalIgnoreCase) + || string.Equals(attributeName, "poster", StringComparison.OrdinalIgnoreCase); + + private static bool TrySanitizeUrlValue(string rawValue, out string sanitizedValue) + { + sanitizedValue = null; + + if (string.IsNullOrWhiteSpace(rawValue)) + return false; + + var value = rawValue.Trim().Trim('"', '\''); + + if (value.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase) + || value.StartsWith("vbscript:", StringComparison.OrdinalIgnoreCase)) + return false; + + if (value.StartsWith("data:", StringComparison.OrdinalIgnoreCase) && !IsAllowedImageDataUrl(value)) + return false; + + sanitizedValue = value; + return true; + } + + private static bool IsAllowedImageDataUrl(string value) + { + const string dataPrefix = "data:"; + + if (!value.StartsWith(dataPrefix, StringComparison.OrdinalIgnoreCase)) + return false; + + var payloadStart = value.IndexOf(',', StringComparison.Ordinal); + + if (payloadStart <= dataPrefix.Length) + return false; + + var metadata = value[dataPrefix.Length..payloadStart]; + var metadataParts = metadata.Split(';', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + if (metadataParts.Length == 0) + return false; + + return AllowedDataImageMimeTypes.Contains(metadataParts[0]); } protected override void VisitTextPart(TextPart entity) @@ -300,12 +431,6 @@ public class HtmlPreviewVisitor : MimeVisitor { bool valid = signature.Verify(); Signatures.Add(signature, valid); - - // If valid is true, then it signifies that the signed content has not - // been modified since this particular signer signed the content. - // - // However, if it is false, then it indicates that the signed content - // has been modified. } catch (DigitalSignatureVerifyException) { diff --git a/Wino.Core.Tests/Models/HtmlPreviewVisitorTests.cs b/Wino.Core.Tests/Models/HtmlPreviewVisitorTests.cs new file mode 100644 index 00000000..7fe2b080 --- /dev/null +++ b/Wino.Core.Tests/Models/HtmlPreviewVisitorTests.cs @@ -0,0 +1,76 @@ +using Xunit; +using FluentAssertions; +using MimeKit; +using Wino.Core.Domain.Models.MailItem; + +namespace Wino.Core.Tests.Models; + +public class HtmlPreviewVisitorTests +{ + [Fact] + public void HtmlPreviewVisitor_Should_Remove_Blocked_Tags_And_Event_Attributes() + { + // Arrange + var html = """ + + +

hello

+ + + + + + + """; + + var message = new MimeMessage(); + message.Body = new TextPart("html") { Text = html }; + + var visitor = new HtmlPreviewVisitor(Path.GetTempPath()); + + // Act + message.Accept(visitor); + var output = visitor.HtmlBody; + + // Assert + output.Should().NotContain(" + + safe + bad + + + + + """; + + var message = new MimeMessage(); + message.Body = new TextPart("html") { Text = html }; + + var visitor = new HtmlPreviewVisitor(Path.GetTempPath()); + + // Act + message.Accept(visitor); + var output = visitor.HtmlBody; + + // Assert + output.Should().Contain("id=\"safe-link\" href=\"https://contoso.com/path\"", "http/https links should be preserved"); + output.Should().Contain("id=\"js-link\"", "the element should remain"); + output.Should().NotContain("href=\"javascript:", "javascript URLs must be removed"); + output.Should().Contain("id=\"allowed\" src=\"data:image/png;base64", "safe image data URLs should be preserved"); + output.Should().NotContain("id=\"svg-script\" src=\"data:text/html", "non-image data URLs should be removed"); + } +}