Although Scoble already received a lot of answers on how he can get get what he wants out of his Excel file containing all the URL's from the http://weblogs.com/changes.xml file I couldn't help but fire up VS and solve his problem using Linq to XML. I'm sure Scoble will send me a big fat check for solving his problem with one of the cooler technologies that will coming out of his former employer. Of course I didn't give him just the URL's that he asked for either so maybe I shouldn't hold my breath. On the other hand maybe he'll give me a bonus for giving him a nice list that has the number of sites for each site he's interested in grouped with a count?
Output:
- Blogspot has 8928 sites in the changes.xml file
- Spaces has 900 sites in the changes.xml file
- Wordpress has 384 sites in the changes.xml file
- TypePad has 118 sites in the changes.xml file
Code:
1 using System;
2 using System.Collections.Generic;
3 using System.Text;
4 using System.Query;
5 using System.Xml.XLinq;
6 using System.Data.DLinq;
7
8 namespace ScobleWeblogsDotComCleaner {
9 class Program {
10 static void Main(string[] args) {
11 XElement weblogs = XElement.Load("http://rpc.weblogs.com/changes.xml");
12
13 var matches =
14 from weblog in weblogs.Elements("weblog")
15 where IsMatch((string) weblog.Attribute("url"))
16 group weblog by GetSite((string) weblog.Attribute("url")) into sites
17 orderby sites.Count() descending
18 select new {
19 HostingSite=sites.Key,
20 Count=sites.Count(),
21 Sites=sites
22 };
23
24 foreach(var match in matches) {
25 Console.WriteLine(match.HostingSite + " has " + match.Count + " sites in the changes.xml file");
26 foreach(var x in match.Sites) {
27 Console.WriteLine(" - " + (string) x.Attribute("url"));
28 }
29 }
30 }
31
32 private static string GetSite(string url) {
33 if(url.Contains("spaces.live.com")) return "Spaces";
34 else if(url.Contains("typepad.com")) return "TypePad";
35 else if(url.Contains("blogspot.com")) return "Blogspot";
36 else if(url.Contains("wordpress.com")) return "Wordpress";
37 return String.Empty;
38 }
39
40 private static bool IsMatch(string url) {
41 return
42 url.Contains("spaces.live.com") ||
43 url.Contains("typepad.com") ||
44 url.Contains("blogspot.com") ||
45 url.Contains("wordpress.com");
46 }
47 }
48 }
tags: linqtoxml, xlinq, linq, scoble