#!/usr/bin/ruby require 'rexml/document' require 'rexml/streamlistener' class String def entitize gsub(/[&'"<>]/) {|c| case c when '&' then '&' when '"' then '"' when "'" then ''' when '<' then '<' when '>' then '>' else c end } end end class MDStream include REXML::StreamListener def initialize @mode = nil end PATS = [ #/^([A-Za-z]+:)?pointOfContact$/, /^(gmd:)?contactInfo$/, /^(gmd:)?referenceSystemInfo$/, /^(gmd:)?metadataConstraints$/, /^(gmd:)?contentInfo$/, /^(gmd:)?qualityInfo$/ ] def tag_start n, a for pat in PATS if pat === n @mode = pat end end return if @mode r = [ "<#{n}" ] for k, v in a r.push [ k, '="', v.entitize, '"' ].join end r.push '>' print r.join(' ') end def text t return if @mode print t.entitize end def tag_end n unless @mode print "" end @mode = nil if @mode and @mode === n end end list = MDStream.new for file in ARGV source = File.new(file) REXML::Document.parse_stream(source, list) end