Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

Using MbUnit StaticTestFactory to Validate Sitemap.xml Links

0.00/5 (No votes)
12 May 2010 1  
How to use MbUnit StaticTestFactory to validate Sitemap.xml links

I’ve been investigating a replacement for our current link checker (SEO Optimization Toolkit) to be run automatically as part of our build so that we can get a quick heads up if one of our dynamic pages breaks. The problem is that as most of our sites are built with Ektron, there’s a lot of potential for a combination of content and code to break individual pages that share a template with working pages.

As these pages are data driven, hard coding tests are very timely and very fragile. Fortunately, most of our Ektron sites have automatically generated sitemap.xml which gives us a neat list of URLs to test which is where MbUnit’s StaticTestFactory comes in useful as it allows us to dynamically create distinct tests for each URL.

Enjoy!

  1: using System;
  2: using System.Collections.Generic;
  3: using System.Globalization;
  4: using System.Linq;
  5: using System.Net;
  6: using System.Xml.Linq;
  7: using MbUnit.Framework;
  8:
  9: namespace MartinOnDotNet.VerificationTests
 10: {
 11:     /// <summary>
 12:     /// Includes methods to verify the validity of a sitemap.xml
 13:     /// </summary>
 14:     public sealed class ValidateSiteMap
 15:     {
 16:
 17:         /// <summary>
 18:         /// Generates a static test for each url referenced within the sitemap
 19:         /// </summary>
 20:         [StaticTestFactory, Parallelizable(TestScope.Descendants)]
 21:         public static IEnumerable<Test> GenerateSiteMapLinkTests()
 22:         {
                 // Uri for Xml Sitemap to test : http://localhost/sitemap.xml
 23:             Uri sitemapUri = new Uri(Properties.Settings.Default.SiteMapXmlUri);
                 //timeout for each request in ms : 300ms
 24:             int requestTimeout = Properties.Settings.Default.SiteMapRequestTimeout;
 25:
 26:             IEnumerable<string> locations = GetSitemapLocations(sitemapUri);
 27:             //is sitemap populated
 28:             yield return CreateSitemapHasNodesTest(sitemapUri, locations);
 29:
 30:             //are all reference urls valid
 31:             foreach (string location in locations)
 32:             {
 33:                 yield return CreateLocationTest(requestTimeout, location,
                         HttpStatusCode.OK);
 34:             }
 35:
 36:             // check that robots.txt is present
 37:             Uri robotstxtUri = new Uri(sitemapUri, "/robots.txt");
 38:             yield return CreateLocationTest(requestTimeout, robotstxtUri.ToString(),
                     HttpStatusCode.OK);
 39:             //finally, let's check that a deliberately incorrect url
 40:             Uri nonExistantUri = new Uri(sitemapUri, "/nonexistantfileonserver/");
 41:             yield return CreateLocationTest(requestTimeout,
                     nonExistantUri.ToString(), HttpStatusCode.NotFound);
 42:
 43:         }
 44:
 45:         /// <summary>
 47:         /// </summary>
 48:         /// <param name="sitemapUri">The sitemap URI.</param>
 49:         /// <param name="locations">The locations.</param>
 50:         /// <returns>A test that checks the sitemap has nodes</returns>
 51:         private static TestCase CreateSitemapHasNodesTest(Uri sitemapUri,
                 IEnumerable<string> locations)
 52:         {
 53:             return new TestCase(string.Format(CultureInfo.InvariantCulture,
                     "{0} - Sitemap Has Entries", sitemapUri), () =>
 54:             {
 55:                 Assert.IsTrue(locations.Any());
 56:             });
 57:         }
 58:
 59:         /// <summary>
 60:         /// Creates the location test.
 61:         /// </summary>
 62:         /// <param name="requestTimeout">The request timeout.</param>
 63:         /// <param name="location">The location.</param>
 64:         /// <returns>A unique test for a sitemap location</returns>
 65:         private static TestCase CreateLocationTest(int requestTimeout,
                 string location, HttpStatusCode expectedResult)
 66:         {
 67:             return new TestCase(location, () =>
 68:             {
 69:                 HttpWebRequest wrq =
                         HttpWebRequest.Create(location) as HttpWebRequest;
                     // appear to be google to escape any custom error handling
 70:                 wrq.UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)";
 71:                 wrq.Timeout = requestTimeout;
 72:                 HttpWebResponse wrp = null;
 73:                 try
 74:                 {
 75:                     wrp = GetResponse(wrq);
 76:                     Assert.AreEqual<System.Net.HttpStatusCode>(expectedResult,
                             wrp.StatusCode);
 77:                 }
 78:                 finally
 79:                 {
 80:                     if (wrp != null) wrp.Close();
 81:                 }
 82:             });
 83:         }
 84:
 85:         #region Helper Methods
 86:
 87:         /// <summary>
 88:         /// Gets the sitemap locations.
 89:         /// </summary>
 90:         /// <param name="sitemapUri">The sitemap URI.</param>
 91:         /// <returns>A list of locations referenced within the sitemap</returns>
 92:         private static IEnumerable<string> GetSitemapLocations(Uri sitemapUri)
 93:         {
 94:             XNamespace xn = XNamespace.Get(
                     @"http://www.sitemaps.org/schemas/sitemap/0.9");
 95:             XDocument xdoc = XDocument.Load(sitemapUri.ToString(),
                     LoadOptions.PreserveWhitespace);
 96:             return from loc in xdoc.Descendants(xn + "loc")
 97:                             select loc.Value;
 98:         }
 99:
100:         /// <summary>
101:         /// Gets the response object and handles any protocol exceptions
102:         /// </summary>
103:         /// <param name="request">The request.</param>
104:         /// <returns>The response object if available</returns>
105:         private static HttpWebResponse GetResponse(HttpWebRequest request)
106:         {
107:             try
108:             {
109:                 return request.GetResponse() as HttpWebResponse;
110:             }
111:             catch (WebException wex)
112:             {
113:                 if (wex.Status == WebExceptionStatus.ProtocolError)
114:                 {
115:                     return wex.Response as HttpWebResponse;
116:                 }
117:                 else
118:                 {
119:                     throw;
120:                 }
121:             }
122:         }
123:
124:         #endregion
125:
126:     }
127: }

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here