|
| 1 | +require 'rss' |
| 2 | +require 'net/http' |
| 3 | +require 'uri' |
| 4 | +require 'yaml' |
| 5 | +require 'time' |
| 6 | +require 'active_support/broadcast_logger' |
| 7 | + |
| 8 | +def safe_open(url) |
| 9 | + uri = URI.parse(url) |
| 10 | + raise "不正なURLです: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) |
| 11 | + |
| 12 | + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| |
| 13 | + request = Net::HTTP::Get.new(uri) |
| 14 | + response = http.request(request) |
| 15 | + response.body |
| 16 | + end |
| 17 | +end |
| 18 | + |
| 19 | +namespace :news do |
| 20 | + desc 'RSS フィードから最新ニュースを取得し、db/news.yml に書き出す' |
| 21 | + task fetch: :environment do |
| 22 | + # ロガー設定(ファイル+コンソール出力) |
| 23 | + file_logger = ActiveSupport::Logger.new('log/news.log') |
| 24 | + console = ActiveSupport::Logger.new(STDOUT) |
| 25 | + logger = ActiveSupport::BroadcastLogger.new(file_logger, console) |
| 26 | + |
| 27 | + logger.info('==== START news:fetch ====') |
| 28 | + |
| 29 | + # 既存の news.yml を読み込み |
| 30 | + yaml_path = Rails.root.join('db', 'news.yml') |
| 31 | + existing_news = if File.exist?(yaml_path) |
| 32 | + YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)['news'] || [] |
| 33 | + else |
| 34 | + [] |
| 35 | + end |
| 36 | + |
| 37 | + # テスト/ステージング環境ではサンプルファイル、本番は実サイトのフィード |
| 38 | + feed_urls = if Rails.env.test? || Rails.env.staging? |
| 39 | + [Rails.root.join('spec', 'fixtures', 'sample_news.rss').to_s] |
| 40 | + else |
| 41 | + [ |
| 42 | + 'https://news.coderdojo.jp/feed/' |
| 43 | + # 必要に応じて他 Dojo の RSS もここに追加可能 |
| 44 | + # 'https://coderdojotokyo.org/feed', |
| 45 | + ] |
| 46 | + end |
| 47 | + |
| 48 | + # RSS 取得&パース |
| 49 | + new_items = feed_urls.flat_map do |url| |
| 50 | + logger.info("Fetching RSS → #{url}") |
| 51 | + begin |
| 52 | + rss = safe_open(url) |
| 53 | + feed = RSS::Parser.parse(rss, false) |
| 54 | + feed.items.map do |item| |
| 55 | + { |
| 56 | + 'url' => item.link, |
| 57 | + 'title' => item.title, |
| 58 | + 'published_at' => item.pubDate.to_s |
| 59 | + } |
| 60 | + end |
| 61 | + rescue => e |
| 62 | + logger.warn("⚠️ Failed to fetch #{url}: #{e.message}") |
| 63 | + [] |
| 64 | + end |
| 65 | + end |
| 66 | + |
| 67 | + # 既存データをハッシュに変換(URL をキーに) |
| 68 | + existing_items_hash = existing_news.index_by { |item| item['url'] } |
| 69 | + |
| 70 | + # 新しいアイテムと既存アイテムを分離 |
| 71 | + truly_new_items = [] |
| 72 | + updated_items = [] |
| 73 | + |
| 74 | + new_items.each do |new_item| |
| 75 | + if existing_items_hash.key?(new_item['url']) |
| 76 | + # 既存アイテムの更新 |
| 77 | + existing_item = existing_items_hash[new_item['url']] |
| 78 | + updated_item = existing_item.merge(new_item) # 新しい情報で更新 |
| 79 | + updated_items << updated_item |
| 80 | + else |
| 81 | + # 完全に新しいアイテム |
| 82 | + truly_new_items << new_item |
| 83 | + end |
| 84 | + end |
| 85 | + |
| 86 | + # 既存の最大IDを取得 |
| 87 | + max_existing_id = existing_news.map { |item| item['id'].to_i }.max || 0 |
| 88 | + |
| 89 | + # 新しいアイテムのみに ID を割り当て(古い順) |
| 90 | + truly_new_items_sorted = truly_new_items.sort_by { |item| |
| 91 | + Time.parse(item['published_at']) |
| 92 | + } |
| 93 | + |
| 94 | + truly_new_items_sorted.each_with_index do |item, index| |
| 95 | + item['id'] = max_existing_id + index + 1 |
| 96 | + end |
| 97 | + |
| 98 | + # 更新されなかった既存アイテムを取得 |
| 99 | + updated_urls = updated_items.map { |item| item['url'] } |
| 100 | + unchanged_items = existing_news.reject { |item| updated_urls.include?(item['url']) } |
| 101 | + |
| 102 | + # 全アイテムをマージ |
| 103 | + all_items = unchanged_items + updated_items + truly_new_items_sorted |
| 104 | + |
| 105 | + # 日付降順ソート |
| 106 | + sorted_items = all_items.sort_by { |item| |
| 107 | + Time.parse(item['published_at']) |
| 108 | + }.reverse |
| 109 | + |
| 110 | + File.open('db/news.yml', 'w') do |f| |
| 111 | + formatted_items = sorted_items.map do |item| |
| 112 | + { |
| 113 | + 'id' => item['id'], |
| 114 | + 'url' => item['url'], |
| 115 | + 'title' => item['title'], |
| 116 | + 'published_at' => item['published_at'] |
| 117 | + } |
| 118 | + end |
| 119 | + |
| 120 | + f.write({ 'news' => formatted_items }.to_yaml) |
| 121 | + end |
| 122 | + |
| 123 | + logger.info("✅ Wrote #{sorted_items.size} items to db/news.yml (#{truly_new_items_sorted.size} new, #{updated_items.size} updated)") |
| 124 | + logger.info('==== END news:fetch ====') |
| 125 | + end |
| 126 | +end |
0 commit comments