#!/usr/bin/ruby require 'rexml/document' class App def initialize @ns = { "x"=>"urn:schemas-microsoft-com:office:excel", "ss"=>"urn:schemas-microsoft-com:office:spreadsheet" } end def run args for file in args convert file end end def parsehash doc, sheet faxsheet = REXML::XPath.first(doc, <<-XPATH, @ns) /ss:Workbook/ss:Worksheet[@ss:Name="#{sheet}"] XPATH raise unless faxsheet nodes = REXML::XPath.match(faxsheet, <<-XPATH, @ns) ss:Table/ss:Row[1]/ss:Cell/ss:Data//text() XPATH cols = nodes.map {|cell| REXML::Text.unnormalize(cell.to_s) } nodes = nil nodes = REXML::XPath.match(faxsheet, "ss:Table/ss:Row[position()>1]", @ns) nodes.each {|rownode| row = {} irow = 0 nodes2 = REXML::XPath.match(rownode, "ss:Cell", @ns) nodes2.each {|cellnode| if a = REXML::XPath.first(cellnode, '@ss:Index', @ns) then irow = a.value.to_i else irow += 1 end text = REXML::XPath.first(cellnode, 'ss:Data//text()', @ns) text = REXML::Text.unnormalize(text.to_s) row[cols[irow - 1]] = text } nodes2 = nil yield row } nodes = nil end def serialize md doc = REXML::Document.new doc << REXML::XMLDecl.new('1.0', 'UTF-8') root = doc.add_element('metadata') root.add_namespace("http://www.gisc.kishou.go.jp/xsd/jmd0.1") for key, val in md for sval in Array(val) node = root.add_element(key) node.add_text(sval) case key when /bc$/ then node.add_attribute('units', 'deg') when /^(avail|maxftime)$/ then node.add_attribute('units', 'h') end end end doc end def convert_row row throw(:ng, "STAMP undefined") if row['STAMP'].to_s.strip.empty? stamp = row['STAMP'].strip.gsub(/\s/, ' and ') row['*fid'] = fid = row['STAMP'].strip.split(/\s+/).sort.join('-') for key in %w(URL Theme Level TFcst Area.Desc North) throw(:ng, "#{fid}: #{key} undefined") if row[key].to_s.strip.empty? end title = "Fax chart #{stamp}" xtheme = row['Theme'].split(/\s+/).map{|s| s.gsub(/_/, ' ')}.join('/') title += " - #{xtheme}" unless xtheme.empty? xlev = row['Level'].split(/\s+/).map{|s| s.gsub(/_/, ' ')}.join('/') title += " on #{xlev}" unless xlev.empty? xft = row['TFcst'].split(/\s+/).map{|s| s.gsub(/_/, ' ')}.join('/') title += " at #{xft}" unless xft.empty? title += " for #{row['Area.Desc']}" unless row['Area.Desc'].to_s.empty? fmt = row['URL'].strip.split(/\s+/).map{|u| u.sub(/.*\./, '').sub(/html/, 'png').upcase }.uniq.sort res = "Automated black-and-white chart" res.sub!(/Automated/, row['Manual']) unless row['Manual'].to_s.empty? md = { 'mdfid' => "urn:x-wmo:md:jp.go.jma.wis.dcpc-geogr::FAX-#{fid}", 'wisorg' => "DCPC/RSMC-Geographical Tokyo, Japan Meteorological Agency", 'wiscont' => 'mailto:wis-jma@met.kishou.go.jp', 'mddate' => Time.now.utc.strftime('%Y-%m-%d'), 'subjkey' => 'synopticMeteorology', 'topicat' => 'climatologyMeteorologyAtmosphere', 'title' => title, 'abstract' => (< row['Theme'].strip.split(/\s+/), 'southbc' => '%3.1f' % row['South'].to_f, 'northbc' => '%3.1f' % row['North'].to_f, 'westbc' => '%3.1f' % row['West'].to_f, 'eastbc' => '%3.1f' % row['East'].to_f, 'stratkey' => row['Level'].strip.split(/\s+/), 'avail' => '24', 'maxftime' => row['TFcst'].strip.split(/\s+/).map{|s| s.sub(/(Analysis|Observation)/, '0').sub(/_h/, '').to_i }.max.to_s, 'updcycle' => 'daily', 'placekey' => row['Area.Desc'].strip.gsub(/\s/, '_'), 'format' => fmt, 'url' => row['URL'].strip.split(/\s+/), } end def convert file doc = REXML::Document.new(File.open(file, 'r')) areatab = {} parsehash(doc, 'area') {|row| areatab[row['Area']] = row } parsehash(doc, 'FAX') {|row| area = row['Area'] unless areatab[area] puts "area #{area} undefined" next end row.update(areatab[area]) msg = catch(:ng) { md = convert_row(row) fid = row['*fid'] puts fid jmd = serialize(md) File.open("md#{fid}.jmd", 'w') {|ofp| jmd.write(ofp, 1, true) } nil } puts msg if msg } end end App.new.run(ARGV)