#! /usr/bin/ruby
require 'rss'
require 'rss/maker'
require 'rubygems'
require 'hpricot'
require 'nkf'
require 'uconv'
require 'optparse'
CODES = {
NKF::JIS => "JIS",
NKF::EUC => "EUC",
NKF::SJIS => "SJIS",
NKF::BINARY => "BINARY",
NKF::UNKNOWN => "UNKNOWN(ASCII)",
}
def guessEncode (fname)
str = open(fname) {|io| io.gets(nil) }
return CODES.fetch(NKF.guess(str))
end
# paper class のタグを読み込み hash の配列として返す
def readHTML(fname, tmpfile)
paperList = []
fstat = File.stat(fname)
htmldata = Hpricot(File.read(tmpfile))
(htmldata/".paper").each do |p|
paper = Hash.new
p_title = "\"#{p.at("[@class='title']").inner_text.sub(/\[\d+?\]/,"").sub(/^\s+/,"").sub(/\s+$/,"")}\""
p_title += ":\t#{p.at("[@class='publication']").inner_text}"
p_desc = "Author: #{p.at("[@class='author']").inner_text}
"
p_desc += "Publication: #{p.at("[@class='publication']").inner_text}
"
p_desc += p.at("[@class='description']").inner_text
tag = p.at("[@class='link']")
p_link = nil
tag.search("a") { |ele|
p_link = ele['href']
} unless tag == nil
if $ifencode == 'EUC'
p_title = Uconv.euctou8(p_title)
p_desc = Uconv.euctou8(p_desc)
p_link = Uconv.euctou8(p_link) unless p_link == nil
elsif $ifencode == 'SJIS'
p_title = Uconv.sjistou8(p_title)
p_desc = Uconv.sjistou8(p_desc)
p_link = Uconv.sjistou8(p_link) unless p_link == nil
end
paper['title'] = p_title
paper['desc'] = p_desc
paper['link'] = p_link
paper['date'] = fstat.mtime
paperList.push(paper)
end
return paperList
end
def readRSS(fname)
feed = nil
begin
feed = RSS::Parser.parse(File.read(fname),false)
#rescue RSS::Error
# STDERR.puts "#{fname}はRSS 0.9x/1.0/2.0, Atom 1.0のいずれでもありません。" if feed == nil
end
return feed
end
def update(rss, papers)
papers.each do |paper|
flagp = false
rss.items.each do |item|
if item.title == paper['title']
if item.description != paper['desc']
item.description = paper['desc']
item.link = paper['link']
item.date = paper['date']
end
flagp = true
end
if item.description == paper['desc']
if item.title != paper['title']
item.title = paper['title']
item.link = paper['link']
item.date = paper['date']
end
flagp = true
end
end
next if flagp
item = RSS::RDF::Item.new
item.title = paper['title']
item.link = paper['link'] unless paper['link'] == nil
item.description = paper['desc']
item.date = paper['date']
rss.items << item
end
return rss
end
def mkRSS(rssuri,name,description,link, paperList)
rss = RSS::Maker.make("2.0") do |maker|
maker.channel.title = name
maker.channel.description = description
maker.channel.link = link
maker.items.do_sort = true
maker.encoding = "UTF-8"
# 複数の場合は以下を繰り替えす
paperList.each do |paper|
maker.items.new_item do |item|
item.title = paper['title']
item.link = paper['link'] # unless paper['link'] == nil
item.description = paper['desc']
item.date = paper['date']
end
end
end
end
def rmkRSS(rss)
rdf = RSS::Maker.make("2.0") do |maker|
maker.channel.title = rss.channel.title
maker.channel.description = rss.channel.description
maker.channel.link = rss.channel.link
maker.items.do_sort = true
maker.encoding = "UTF-8"
# 複数の場合は以下を繰り替えす
rss.items.each do |paper|
maker.items.new_item do |item|
item.title = paper.title
item.link = paper.link
item.description = paper.description
item.date = paper.date
end
end
end
end
begin
#option 設置
updatep = false
opts = OptionParser.new
opts.on("-update") { |v| updatep = true }
opts.parse!(ARGV)
inputFilename = ARGV.shift
# 入力ファイルを nkf を使って文字コードを utf-8 に変換
day = Time.now
datestring = day.strftime("%y%m%d-%H%M%S")
tmpfilename = "tmp_SPRSS#{datestring}.txt"
tmpfile = open(tmpfilename,"w")
File.open(inputFilename) { |file|
while line = file.gets
tmpfile.print NKF.nkf("-sjis",line)
end
}
tmpfile.close
$ifencode = guessEncode(tmpfilename)
paperList = readHTML(inputFilename,tmpfilename)
if updatep
rdfFilename = ARGV.shift
rss = readRSS(rdfFilename)
rdf = update(rss,paperList)
ofile = open(rdfFilename,"w")
puts rdf.items.length
ofile.puts rmkRSS(rdf).to_s
ofile.close
else
title = ARGV.shift
desc = ARGV.shift
uri = ARGV.shift
puts mkRSS('',title,desc,uri,paperList).to_s
end
File.delete(tmpfilename) # tmp ファイル
end