I have been extending an open source web crawler written in .NET https://github.com/sjdirect/abot
This has made it very easy to run smoke tests such as looking for Internal Server Errors (5XX) as part of informal testing before a release.
There is also a commercial version https://abotx.org/ built on top of abot
A console app with a custom crawler_ProcessPageCrawlCompleted method implemented is enough to get started (I just used the provided sample code for the other methods).
static void Main(string[] args)
{
//Will use app.config for configuration
PoliteWebCrawler crawler = new PoliteWebCrawler();
crawler.PageCrawlStartingAsync += crawler_ProcessPageCrawlStarting;
crawler.PageCrawlCompletedAsync += crawler_ProcessPageCrawlCompleted;
crawler.PageCrawlDisallowedAsync += crawler_PageCrawlDisallowed;
crawler.PageLinksCrawlDisallowedAsync += crawler_PageLinksCrawlDisallowed;
Console.ReadKey();
}
static void crawler_ProcessPageCrawlCompleted(object sender, PageCrawlCompletedArgs e)
{
CrawledPage crawledPage = e.CrawledPage;
if (crawledPage.WebException != null)
{
// do something
}
else if (crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK)
{
// do something
}
}