There are many ways to search a system, what we have to be careful of is making sure that the search does not cause a DoS (Denial of Service) attack.
So following on from my article Denial Of Service (DoS) attacks via SQL Wildcards should be prevented here is a method to remove and clean up the search to prevent such an attack.
using System; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections.Specialized; public class Search { private readonly Regex RegexStripHtml = new Regex("<[^>]*>", RegexOptions.Compiled); private StringCollection StopWords { get { var stopWords = new StringCollection(); // Add your stopword here, or get them from a data source return stopWords; } } public string CleanContent(string content, bool removeHtml) { if (removeHtml) { content = StripHtml(content); } content = content.Replace("\\", string.Empty).Replace("|", string.Empty).Replace("(", string.Empty).Replace( ")", string.Empty).Replace("[", string.Empty).Replace("]", string.Empty).Replace("*", string.Empty). Replace("?", string.Empty).Replace("}", string.Empty).Replace("{", string.Empty).Replace( "^", string.Empty).Replace("+", string.Empty).Replace("%", string.Empty).Replace("_", string.Empty); var words = content.Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var sb = new StringBuilder(); foreach (var word in words.Select(t => t.ToLowerInvariant().Trim()).Where(word => word.Length > 1 && !StopWords.Contains(word))) { sb.AppendFormat("{0} ", word); } return sb.ToString().Trim(); } private string StripHtml(string html) { return StringIsNullOrWhitespace(html) ? string.Empty : RegexStripHtml.Replace(html, string.Empty).Trim(); } private bool StringIsNullOrWhitespace(string value) { return ((value == null) || (value.Trim().Length == 0)); }
Some of this code was cribbed from the BlogEngine
Here are some Unit Tests to test the CleanCode method, yes Unit Tests, this is the perfect method to perform and under how the methodworks.
using Microsoft.VisualStudio.TestTools.UnitTesting; using Capita.Dolphin.Web.Helpers; /// <summary> /// Summary description for HelperSearchTests /// </summary> [TestClass] public class HelperSearchTests { [TestMethod] public void CleanContentValidCharacters() { // Assign var search = new Search(); const string expected = "hello moon"; // Act var actual = search.CleanContent("Hello moon", false); // Assert Assert.AreEqual(expected, actual); } [TestMethod] public void CleanContentInValidCharacters() { // Assign var search = new Search(); const string expected = "hello moon"; // Act var actual = search.CleanContent("Hello moo%*([|+^}{)n", false); // Assert Assert.AreEqual(expected, actual); } [TestMethod] public void CleanContentRemoveHTMLCharacters() { // Assign var search = new Search(); const string expected = "hello moon"; // Act var actual = search.CleanContent("<p>Hello moon</p>", true); // Assert Assert.AreEqual(expected, actual); } [TestMethod] public void CleanContentRemoveExtraWildCardsCharacters() { // Assign var search = new Search(); const string expected = "hello moon"; // Act var actual = search.CleanContent("[][^]_%Hello moon</p>", true); // Assert Assert.AreEqual(expected, actual); } }