So we wrote a simple script for this
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.5.0-RC2' )
import groovyx.net.http.HTTPBuilder
//small script to verify that an url exist, which was specified in a sitemap
//args[0] is the sitemap
//simple check
if(args.length != 1){
println "Usage\n"
println "please provide one argument, which is a filename pointing to your sitemap file"
println "\n"
return
}
//load file
def file = new File(args[0])
if(file.exists() == false){
println "\nsorry your file was not found. Missing file is ${args[0]}\n"
return
}
//parsing urls
def root = new XmlSlurper().parse(file)
def urls = root.url
println "\nchecking ${urls.size()} urls\n"
def success = 0
def failure = 0
urls.each{
def url = it.loc.text()
print "success:\t"
try {
new HTTPBuilder( url ).get( path:'' ) { response ->
response.statusLine.statusCode == 200
print "true"
success++
}
}
catch( e ) {
print "false"
failure++
}
print "\t that ${url} exists\n"
}
println "\nreport..."
println "\tsuccessful: ${success}"
println "\tfailed: ${failure}"
println "\ttotal: ${urls.size()}"
println "\nchecked ${urls.size()} urls in file ${args[0]}\n\n"
and the output looks something like this...
groovy verifyUrls.groovy Downloads/sitemap.xml
checking 45 urls
success: true that http://metacore-ucdavis.appspot.com/ exists
success: true that http://metacore-ucdavis.appspot.com/services exists
success: true that http://metacore-ucdavis.appspot.com/techno1 exists
success: true that http://metacore-ucdavis.appspot.com/techno2 exists
success: true that http://metacore-ucdavis.appspot.com/techno3 exists
success: true that http://metacore-ucdavis.appspot.com/setupx exists
success: true that http://metacore-ucdavis.appspot.com/projects exists
success: true that http://metacore-ucdavis.appspot.com/staff exists
success: true that http://metacore-ucdavis.appspot.com/login_form exists
success: true that http://metacore-ucdavis.appspot.com/join_form exists
success: true that http://metacore-ucdavis.appspot.com/mail_password_form exists
success: true that http://metacore-ucdavis.appspot.com/Members/admin exists
success: true that http://metacore-ucdavis.appspot.com/services/ exists
success: true that http://metacore-ucdavis.appspot.com/services/statistics exists
success: true that http://metacore-ucdavis.appspot.com/services/protocols exists
success: true that http://metacore-ucdavis.appspot.com/techno1/ exists
success: true that http://metacore-ucdavis.appspot.com/techno1/compounds exists
success: true that http://metacore-ucdavis.appspot.com/techno1/statistics exists
success: true that http://metacore-ucdavis.appspot.com/techno2/ exists
success: true that http://metacore-ucdavis.appspot.com/techno3/ exists
success: true that http://metacore-ucdavis.appspot.com/setupx/ exists
success: true that http://metacore-ucdavis.appspot.com/projects/ exists
success: true that http://metacore-ucdavis.appspot.com/staff/ exists
success: true that http://metacore-ucdavis.appspot.com/join_form?came_from= exists
success: true that http://metacore-ucdavis.appspot.com/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/Members/ exists
success: true that http://metacore-ucdavis.appspot.com/Members/admin/ exists
success: true that http://metacore-ucdavis.appspot.com/services/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/statistics/ exists
success: true that http://metacore-ucdavis.appspot.com/services/protocols/ exists
success: true that http://metacore-ucdavis.appspot.com/services/protocols/Metabolomics%20Vol.%201%2C%20No.%201%2C%20January%202005%20%28%202005%29.pdf exists
success: true that http://metacore-ucdavis.appspot.com/services/protocols/Proteomics%202004%2C%204%2C%2078-83.pdf exists
success: true that http://metacore-ucdavis.appspot.com/services/protocols/SulfurDeprivation.pdf exists
success: true that http://metacore-ucdavis.appspot.com/techno1/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/techno1/compounds/ exists
success: true that http://metacore-ucdavis.appspot.com/techno2/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/techno3/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/setupx/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/projects/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/staff/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/Members exists
success: true that http://metacore-ucdavis.appspot.com/Members/admin/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/statistics exists
success: true that http://metacore-ucdavis.appspot.com/statistics/index_html/view exists
success: true that http://metacore-ucdavis.appspot.com/techno1/compounds/index_html/view exists
report...
successful: 45
failed: 0
total: 45
checked 45 urls in file Downloads/sitemap.xml