Skip to content

Simple sitemap generator for .NET and .NET Core

License

Notifications You must be signed in to change notification settings

dncuug/X.Web.Sitemap

 
 

Repository files navigation

X.Web.Sitemap

NuGet Version Twitter URL

X.Web.Sitemap is a library for generating and managing sitemaps in .NET applications.

Usage example

Below is an example of basic usage in a non-testable manner

    class Program
    {    
        static void Main(string[] args)
        {
            var sitemap = new Sitemap();

            sitemap.Add(new Url
                {
                    ChangeFrequency = ChangeFrequency.Hourly,
                    Location = "http://www.example.com",
                    Priority = 0.8,
                    TimeStamp = DateTime.Now
                });

            sitemap.Add(CreateUrl("http://www.example.com/link1"));
            sitemap.Add(CreateUrl("http://www.example.com/link2"));
            sitemap.Add(CreateUrl("http://www.example.com/link3"));
            sitemap.Add(CreateUrl("http://www.example.com/link4"));
            sitemap.Add(CreateUrl("http://www.example.com/link5"));


            //Save sitemap structure to file
            sitemap.Save(@"d:\www\example.com\sitemap.xml");

            //Split a large list into pieces and store in a directory
            sitemap.SaveToDirectory(@"d:\www\example.com\sitemaps");

            //Get xml-content of file
            Console.Write(sitemap.ToXml());

            Console.ReadKey();
        }

        private static Url CreateUrl(string url)
        {
            return new Url
                {
                    ChangeFrequency = ChangeFrequency.Daily,
                    Location = url,
                    Priority = 0.5,
                    TimeStamp = DateTime.Now
                };
        }
    }

Below is a more comprehensive example that demonstrates how to create many sitemaps and how to add them to a sitemap index file in a unit-testable fashion.

	public class SitemapGenerationWithSitemapIndexExample
    {
        private readonly ISitemapGenerator _sitemapGenerator;
        private readonly ISitemapIndexGenerator _sitemapIndexGenerator;

        //--this is a bogus interface defined in this example to simulate something you might use to get a list of URls from your CMS or something like that
        private readonly IWebsiteUrlRetriever _websiteUrlRetriever;

        //--and IoC/Dependency injection framework should inject this in
        public SitemapGenerationWithSitemapIndexExample(
            ISitemapGenerator sitemapGenerator, 
            ISitemapIndexGenerator sitemapIndexGenerator, 
            IWebsiteUrlRetriever websiteUrlRetriever)
        {
            _sitemapGenerator = sitemapGenerator;
            _sitemapIndexGenerator = sitemapIndexGenerator;
            _websiteUrlRetriever = websiteUrlRetriever;
        }

        //--this is an example showing how you might take a large list of URLs of different kinds of resources and build both a bunch of sitemaps (depending on
        //  how many URls you have) as well as a sitemap index file to go with it
        public void GenerateSitemapsForMyEntireWebsite()
        {
            //--imagine you have an interface that can return a list of URLs for a resource that you consider to be high priority -- for example, the product detail pages (PDPs)
            //  of your website
            var productPageUrlStrings = _websiteUrlRetriever.GetHighPriorityProductPageUrls();

            //--build a list of X.Web.Sitemap.Url objects and determine what is the appropriate ChangeFrequency, TimeStamp (aka "LastMod" or date that the resource last had changes),
            //  and the a priority for the page. If you can build in some logic to prioritize your pages then you are more sophisticated than most! :)
            var allUrls = productPageUrlStrings.Select(url => new Url
            {
                //--assign the location of the HTTP request -- e.g.: https://www.somesite.com/some-resource
                Location = url,
                //--let's instruct crawlers to crawl these pages monthly since the content doesn't change that much
                ChangeFrequency = ChangeFrequency.Monthly,
                //--in this case we don't know when the page was last modified so we wouldn't really set this. Only assigning here to demonstrate that the property exists.
                //  if your system is smart enough to know when a page was last modified then that is the best case scenario
                TimeStamp = DateTime.UtcNow,
                //--set this to between 0 and 1. This should only be used as a relative ranking of other pages in your site so that search engines know which result to prioritize
                //  in SERPS if multiple pages look pertinent from your site. Since product pages are really important to us, we'll make them a .9
                Priority = .9
            }).ToList();

            var miscellaneousLowPriorityUrlStrings = _websiteUrlRetriever.GetMiscellaneousLowPriorityUrls();
            var miscellaneousLowPriorityUrls = miscellaneousLowPriorityUrlStrings.Select(url => new Url
            {
                Location = url,
                //--let's instruct crawlers to crawl these pages yearly since the content almost never changes
                ChangeFrequency = ChangeFrequency.Yearly,
                //--let's pretend this content was changed a year ago
                TimeStamp = DateTime.UtcNow.AddYears(-1),
                //--these pages are super low priority
                Priority = .1
            }).ToList();

            //--combine the urls into one big list. These could of course bet kept seperate and two different sitemap index files could be generated if we wanted
            allUrls.AddRange(miscellaneousLowPriorityUrls);

            //--pick a place where you would like to write the sitemap files in that folder will get overwritten by new ones
            var targetSitemapDirectory = new DirectoryInfo("\\SomeServer\\some_awesome_file_Share\\sitemaps\\");

            //--generate one or more sitemaps (depending on the number of URLs) in the designated location.
            var fileInfoForGeneratedSitemaps = _sitemapGenerator.GenerateSitemaps(allUrls, targetSitemapDirectory);

            var sitemapInfos = new List<SitemapInfo>();
            var dateSitemapWasUpdated = DateTime.UtcNow.Date;
            
            foreach (var fileInfo in fileInfoForGeneratedSitemaps)
            {
                //--it's up to you to figure out what the URI is to the sitemap you wrote to the file sytsem. In this case we are assuming that the directory above
                //  has files exposed via the /sitemaps/ subfolder of www.mywebsite.com
                var uriToSitemap = new Uri($"https://www.mywebsite.com/sitemaps/{fileInfo.Name}");
               
                sitemapInfos.Add(new SitemapInfo(uriToSitemap, dateSitemapWasUpdated));
            }

            //--now generate the sitemap index file which has a reference to all of the sitemaps that were generated. 
            _sitemapIndexGenerator.GenerateSitemapIndex(sitemapInfos, targetSitemapDirectory, "sitemap-index.xml");

            //-- After this runs you'll want to make sure your robots.txt has a reference to the sitemap index (at the bottom of robots.txt) like this: 
            //  "Sitemap: https://www.mywebsite.com/sitemaps/sitemap-index.xml"
            //  You could do this manually (since this may never change) or if you are ultra-fancy, you could dynamically update your robots.txt with the names of the sitemap index
            //  file(s) you generated

        }


        //--some bogus interface that is meant to simulate pulling urls from your CMS/website
        public interface IWebsiteUrlRetriever
        {
            List<string> GetHighPriorityProductPageUrls();
            List<string> GetMiscellaneousLowPriorityUrls();
        }
    }