Harden junk mail image tracking protection

This commit is contained in:
Burak Kaan Köse
2026-04-21 22:21:59 +02:00
parent e0f517e993
commit c0023614ad
5 changed files with 155 additions and 8 deletions
@@ -15,7 +15,7 @@ public class HtmlPreviewVisitor : MimeVisitor
{
private static readonly HashSet<string> BlockedTags = new(StringComparer.OrdinalIgnoreCase)
{
"script", "iframe", "frame", "frameset", "object", "embed", "applet", "base", "meta", "form"
"script", "iframe", "frame", "frameset", "object", "embed", "applet", "base", "meta", "form", "link"
};
private static readonly HashSet<string> AllowedDataImageMimeTypes = new(StringComparer.OrdinalIgnoreCase)
@@ -15,6 +15,7 @@ public class HtmlPreviewVisitorTests
<html>
<body onload="alert('x')">
<h1 onclick="evil()">hello</h1>
<link rel="stylesheet" href="https://tracker.example/mail.css" />
<script>alert('xss')</script>
<iframe src="https://malicious.example"></iframe>
<object data="https://malicious.example/file.swf"></object>
@@ -34,6 +35,7 @@ public class HtmlPreviewVisitorTests
// Assert
output.Should().NotContain("<script", "script tags must be blocked in rendered html");
output.Should().NotContain("<link", "external stylesheet tags must be blocked in rendered html");
output.Should().NotContain("<iframe", "iframe tags must be blocked in rendered html");
output.Should().NotContain("<object", "object tags must be blocked in rendered html");
output.Should().NotContain("onload=", "event handler attributes must be stripped");
@@ -0,0 +1,53 @@
using FluentAssertions;
using HtmlAgilityPack;
using Wino.Services.Extensions;
using Xunit;
namespace Wino.Core.Tests.Services;
public class HtmlAgilityPackExtensionsTests
{
[Fact]
public void ClearImages_Should_Block_Remote_Image_References_But_Keep_Embedded_Ones()
{
// Arrange
var document = new HtmlDocument();
document.LoadHtml("""
<html>
<head>
<style>
.hero { background-image: url('https://tracker.example/bg.png'); color: red; }
</style>
</head>
<body background="https://tracker.example/body.png">
<img id="remote" src="https://tracker.example/pixel.png" />
<img id="embedded" src="data:image/png;base64,AAAA" />
<img id="responsive" srcset="https://tracker.example/1x.png 1x, data:image/png;base64,BBBB 2x" />
<div id="inline-style" style="background-image:url('https://tracker.example/inline.png');color:blue;">hello</div>
<v:fill id="vml" src="https://tracker.example/vml.png"></v:fill>
<svg>
<image id="svg-remote" href="https://tracker.example/vector.svg"></image>
<use id="svg-local" href="#icon"></use>
</svg>
</body>
</html>
""");
// Act
document.ClearImages();
var output = document.DocumentNode.OuterHtml;
// Assert
output.Should().Contain("id=\"embedded\" src=\"data:image/png;base64,AAAA\"", "embedded inline images should still render");
output.Should().NotContain("id=\"remote\" src=", "remote img sources should be removed");
output.Should().NotContain("background=\"https://tracker.example/body.png\"", "background attributes can be used as trackers");
output.Should().NotContain("srcset=", "responsive image candidates should be removed because they may fetch remote trackers");
output.Should().NotContain("https://tracker.example/inline.png", "inline CSS should not be allowed to fetch remote images");
output.Should().Contain("color:blue", "non-image inline styling should be preserved");
output.Should().NotContain("https://tracker.example/bg.png", "style blocks should not be allowed to fetch remote images");
output.Should().Contain("color: red", "safe CSS declarations should remain");
output.Should().NotContain("id=\"vml\" src=", "VML image references should be removed");
output.Should().NotContain("id=\"svg-remote\" href=", "SVG image references should not fetch remote content");
output.Should().Contain("id=\"svg-local\" href=\"#icon\"", "local fragment references should remain");
}
}
@@ -499,7 +499,7 @@ public partial class MailRenderingPageViewModel : MailBaseViewModel,
// Use the received date from MailCopy if available, otherwise fall back to the sent date from MIME message
CreationDate = initializedMailItemViewModel?.MailCopy.CreationDate ?? message.Date.DateTime;
// Automatically disable images for Junk folder to prevent pixel tracking.
// Automatically block remote image loading for Junk folder to reduce pixel tracking.
// This can only work for selected mail item rendering, not for EML file rendering.
if (initializedMailItemViewModel != null &&
initializedMailItemViewModel.MailCopy.AssignedFolder.SpecialFolderType == SpecialFolderType.Junk)
@@ -1,6 +1,7 @@
using System;
using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace Wino.Services.Extensions;
@@ -8,16 +9,64 @@ namespace Wino.Services.Extensions;
public static class HtmlAgilityPackExtensions
{
/// <summary>
/// Clears out the src attribute for all `img` and `v:fill` tags.
/// Clears passive remote image-loading hooks while preserving already-embedded inline images.
/// </summary>
/// <param name="document"></param>
public static void ClearImages(this HtmlDocument document)
{
if (document.DocumentNode.InnerHtml.Contains("<img"))
if (document?.DocumentNode == null)
{
foreach (var eachNode in document.DocumentNode.SelectNodes("//img"))
return;
}
foreach (var eachNode in document.DocumentNode.Descendants().ToList())
{
ClearRemoteImageAttribute(eachNode, "src");
ClearRemoteImageAttribute(eachNode, "background");
ClearRemoteImageAttribute(eachNode, "poster");
ClearRemoteImageAttribute(eachNode, "data");
if (eachNode.Attributes.Contains("srcset"))
{
eachNode.Attributes.Remove("src");
eachNode.Attributes.Remove("srcset");
}
if (eachNode.Attributes.Contains("imagesrcset"))
{
eachNode.Attributes.Remove("imagesrcset");
}
if (eachNode.Attributes.Contains("style"))
{
var sanitizedStyle = SanitizeCss(eachNode.GetAttributeValue("style", string.Empty));
if (string.IsNullOrWhiteSpace(sanitizedStyle))
{
eachNode.Attributes.Remove("style");
}
else
{
eachNode.SetAttributeValue("style", sanitizedStyle);
}
}
if (IsSvgImageReferenceElement(eachNode))
{
ClearRemoteImageAttribute(eachNode, "href");
ClearRemoteImageAttribute(eachNode, "xlink:href");
}
}
foreach (var styleNode in document.DocumentNode.Descendants("style").ToList())
{
var sanitizedCss = SanitizeCss(styleNode.InnerHtml);
if (string.IsNullOrWhiteSpace(sanitizedCss))
{
styleNode.Remove();
}
else
{
styleNode.InnerHtml = sanitizedCss;
}
}
}
@@ -116,4 +165,47 @@ public static class HtmlAgilityPackExtensions
break;
}
}
private static void ClearRemoteImageAttribute(HtmlNode node, string attributeName)
{
var value = node.GetAttributeValue(attributeName, null);
if (string.IsNullOrWhiteSpace(value))
{
return;
}
if (!IsEmbeddedImageSource(value))
{
node.Attributes.Remove(attributeName);
}
}
private static bool IsEmbeddedImageSource(string value)
{
var trimmed = value.Trim().Trim('"', '\'');
return trimmed.StartsWith("data:image/", StringComparison.OrdinalIgnoreCase)
|| trimmed.StartsWith("cid:", StringComparison.OrdinalIgnoreCase)
|| trimmed.StartsWith('#');
}
private static bool IsSvgImageReferenceElement(HtmlNode node)
=> node.Name.Equals("image", StringComparison.OrdinalIgnoreCase)
|| node.Name.Equals("feImage", StringComparison.OrdinalIgnoreCase)
|| node.Name.Equals("use", StringComparison.OrdinalIgnoreCase);
private static string SanitizeCss(string css)
{
if (string.IsNullOrWhiteSpace(css))
{
return string.Empty;
}
var sanitizedCss = Regex.Replace(css, @"(?is)url\s*\([^)]*\)", "none");
sanitizedCss = Regex.Replace(sanitizedCss, @"(?is)image-set\s*\([^)]*\)", "none");
sanitizedCss = Regex.Replace(sanitizedCss, @"(?is)@import\s+[^;]+;?", string.Empty);
return sanitizedCss.Trim();
}
}