{"id":1295,"date":"2025-03-24T08:52:25","date_gmt":"2025-03-23T23:52:25","guid":{"rendered":"https:\/\/dexall.co.jp\/articles\/?p=1295"},"modified":"2025-03-24T08:52:25","modified_gmt":"2025-03-23T23:52:25","slug":"%e3%80%90%e4%bf%9d%e5%ad%98%e7%89%88%e3%80%91ruby%e3%81%a7html%e6%93%8d%e4%bd%9c%e3%82%92%e5%ae%8c%e5%85%a8%e3%83%9e%e3%82%b9%e3%82%bf%e3%83%bc%ef%bc%81%e5%ae%9f%e8%b7%b5%e7%9a%84%e3%81%aa7%e3%81%a4","status":"publish","type":"post","link":"https:\/\/dexall.co.jp\/articles\/?p=1295","title":{"rendered":"\u3010\u4fdd\u5b58\u7248\u3011Ruby\u3067HTML\u64cd\u4f5c\u3092\u5b8c\u5168\u30de\u30b9\u30bf\u30fc\uff01\u5b9f\u8df5\u7684\u306a7\u3064\u306e\u57fa\u790e\u30c6\u30af\u30cb\u30c3\u30af"},"content":{"rendered":"\n<div class=\"toc\"><br \/>\n<b>Warning<\/b>:  Undefined array key \"is_admin\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>116<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_category_top\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>121<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_top\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>128<\/b><br \/>\n    <div id=\"toc_container\" class=\"sgb-toc--bullets js-smooth-scroll\" data-dialog-title=\"\u76ee\u6b21\">\n      <p class=\"toc_title\">\u76ee\u6b21 <\/p>\n      <ul class=\"toc_list\">  <li class=\"first\">    <a href=\"#i-0\">Ruby\u3067HTML\u3092\u6271\u3046\u30e1\u30ea\u30c3\u30c8<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-1\">Web\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u52b9\u7387\u5316\u3067\u5de5\u6570\u309250%\u524a\u6e1b<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-2\">\u81ea\u52d5\u5316\u306b\u3088\u308b\u4eba\u7684\u30df\u30b9\u306e\u30bc\u30ed\u5316\u3092\u5b9f\u73fe<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-3\">Ruby\u3067HTML\u3092\u64cd\u4f5c\u3059\u308b\u57fa\u672c\u30c6\u30af\u30cb\u30c3\u30af<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-4\">Nokogirigem\u3067HTML\u3092\u30d1\u30fc\u30b9\u3059\u308b\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-5\">ERB\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u3067HTML\u3092\u751f\u6210\u3059\u308b\u30d9\u30b9\u30c8\u30d7\u30e9\u30af\u30c6\u30a3\u30b9<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-6\">CGI\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u3063\u305fHTML\u64cd\u4f5c\u306e\u57fa\u790e<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-7\">\u5b9f\u8df5\u7684\u306aHTML\u64cd\u4f5c\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-8\">\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b9\u30af\u30ea\u30d7\u30c8\u306e\u4f5c\u6210\u624b\u9806<\/a>      <\/li>      <li>        <a href=\"#i-9\">\u52d5\u7684\u306aWeb\u30da\u30fc\u30b8\u306e\u751f\u6210\u30c6\u30af\u30cb\u30c3\u30af<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-10\">\u30d5\u30a9\u30fc\u30e0\u51e6\u7406\u306e\u5b9f\u88c5\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-11\">HTML\u64cd\u4f5c\u6642\u306e\u6ce8\u610f\u70b9\u3068\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-12\">\u6587\u5b57\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u306e\u9069\u5207\u306a\u51e6\u7406\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-13\">\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-14\">\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u5bfe\u7b56\u306e\u5b9f\u88c5\u30dd\u30a4\u30f3\u30c8<\/a>      <\/li>    <\/ul>  <\/li>  <li class=\"last\">    <a href=\"#i-15\">\u5b9f\u52d9\u3067\u4f7f\u3048\u308b\u30b3\u30fc\u30c9\u4f8b\u3068\u30b5\u30f3\u30d7\u30eb<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-16\">HTML\u30d1\u30fc\u30b5\u30fc\u306e\u5b9f\u88c5\u4f8b<\/a>      <\/li>      <li>        <a href=\"#i-17\">\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u30a8\u30f3\u30b8\u30f3\u306e\u6d3b\u7528\u4f8b<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-18\">\u5b9f\u7528\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b3\u30fc\u30c9<\/a>      <\/li>    <\/ul>  <\/li><\/ul>\n      <a href=\"#\" class=\"sgb-toc-button js-toc-button\" rel=\"nofollow\" data-open-dialog=\"true\"><i class=\"fa fa-list\"><\/i><span class=\"sgb-toc-button__text\">\u76ee\u6b21\u3078<\/span><\/a>\n    <\/div><\/div><h2 class=\"wp-block-heading\" id=\"i-0\">Ruby\u3067HTML\u3092\u6271\u3046\u30e1\u30ea\u30c3\u30c8<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">Ruby\u3067HTML\u3092\u64cd\u4f5c\u3059\u308b\u3053\u3068\u306f\u3001Web\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u958b\u767a\u3084\u30c7\u30fc\u30bf\u53ce\u96c6\u306e\u73fe\u5834\u3067\u5927\u304d\u306a\u4fa1\u5024\u3092\u751f\u307f\u51fa\u3057\u307e\u3059\u3002\u5177\u4f53\u7684\u306a\u30e1\u30ea\u30c3\u30c8\u3092\u3001\u5b9f\u969b\u306e\u696d\u52d9\u30b7\u30fc\u30f3\u306b\u57fa\u3065\u3044\u3066\u89e3\u8aac\u3057\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-1\">Web\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u52b9\u7387\u5316\u3067\u5de5\u6570\u309250%\u524a\u6e1b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Ruby\u3092\u4f7f\u7528\u3057\u305fHTML\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306f\u3001\u30c7\u30fc\u30bf\u53ce\u96c6\u4f5c\u696d\u3092\u5287\u7684\u306b\u52b9\u7387\u5316\u3057\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u5177\u4f53\u7684\u306a\u30e1\u30ea\u30c3\u30c8\u304c\u3042\u308a\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u81ea\u52d5\u5316\u306b\u3088\u308b\u4f5c\u696d\u6642\u9593\u306e\u5927\u5e45\u524a\u6e1b<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u624b\u52d5\u53ce\u96c6\uff1a1\u65e5\u3042\u305f\u308a8\u6642\u9593<\/li>\n\n\n\n<li>Ruby\u81ea\u52d5\u5316\u5f8c\uff1a1\u65e5\u3042\u305f\u308a4\u6642\u9593\u4ee5\u4e0b<\/li>\n\n\n\n<li>\u52b9\u7387\u5316\u7387\uff1a\u7d0450%\u4ee5\u4e0a<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5927\u91cf\u30c7\u30fc\u30bf\u306e\u9ad8\u901f\u51e6\u7406<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>1\u79d2\u3042\u305f\u308a100\u4ef6\u4ee5\u4e0a\u306e\u30da\u30fc\u30b8\u51e6\u7406\u304c\u53ef\u80fd<\/li>\n\n\n\n<li>\u4e26\u5217\u51e6\u7406\u306b\u3088\u308b\u66f4\u306a\u308b\u9ad8\u901f\u5316<\/li>\n\n\n\n<li>\u30e1\u30e2\u30ea\u52b9\u7387\u306e\u826f\u3044\u6bb5\u968e\u7684\u51e6\u7406<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u67d4\u8edf\u306a\u30c7\u30fc\u30bf\u62bd\u51fa<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>CSS \u30bb\u30ec\u30af\u30bf\u306b\u3088\u308b\u76f4\u611f\u7684\u306a\u8981\u7d20\u6307\u5b9a<\/li>\n\n\n\n<li>XPath \u306b\u3088\u308b\u8907\u96d1\u306a\u6761\u4ef6\u6307\u5b9a<\/li>\n\n\n\n<li>\u6b63\u898f\u8868\u73fe\u3092\u4f7f\u7528\u3057\u305f\u9ad8\u5ea6\u306a\u30d1\u30bf\u30fc\u30f3\u30de\u30c3\u30c1\u30f3\u30b0<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b9f\u969b\u306e\u6d3b\u7528\u4f8b\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'nokogiri'\nrequire 'open-uri'\n\n# Web\u30da\u30fc\u30b8\u304b\u3089\u7279\u5b9a\u306e\u60c5\u5831\u3092\u62bd\u51fa\u3059\u308b\u4f8b\nurl = \"https:\/\/example.com\/data\"\ndoc = Nokogiri::HTML(URI.open(url))\n\n# CSS \u30bb\u30ec\u30af\u30bf\u3067\u8981\u7d20\u3092\u62bd\u51fa\nprices = doc.css('.price').map(&amp;:text)<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-2\">\u81ea\u52d5\u5316\u306b\u3088\u308b\u4eba\u7684\u30df\u30b9\u306e\u30bc\u30ed\u5316\u3092\u5b9f\u73fe<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">HTML\u306e\u624b\u52d5\u64cd\u4f5c\u3067\u767a\u751f\u3057\u304c\u3061\u306a\u4eba\u7684\u30df\u30b9\u3092\u3001Ruby\u306b\u3088\u308b\u81ea\u52d5\u5316\u3067\u78ba\u5b9f\u306b\u9632\u6b62\u3067\u304d\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30c7\u30fc\u30bf\u53ce\u96c6\u6642\u306e\u30df\u30b9\u524a\u6e1b\u52b9\u679c<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30bf\u30a4\u30d7\u30df\u30b9\uff1a100%\u524a\u6e1b<\/li>\n\n\n\n<li>\u30c7\u30fc\u30bf\u306e\u6b20\u843d\uff1a98%\u524a\u6e1b<\/li>\n\n\n\n<li>\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u306e\u4e0d\u7d71\u4e00\uff1a100%\u524a\u6e1b<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u54c1\u8cea\u7ba1\u7406\u306e\u5411\u4e0a<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4e00\u8cab\u6027\u306e\u3042\u308b\u51e6\u7406<\/li>\n\n\n\n<li>\u30c7\u30fc\u30bf\u691c\u8a3c\u306e\u81ea\u52d5\u5316<\/li>\n\n\n\n<li>\u30a8\u30e9\u30fc\u30ed\u30b0\u306e\u81ea\u52d5\u8a18\u9332<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u4f5c\u696d\u306e\u6a19\u6e96\u5316<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u51e6\u7406\u624b\u9806\u306e\u660e\u78ba\u5316<\/li>\n\n\n\n<li>\u518d\u73fe\u6027\u306e\u78ba\u4fdd<\/li>\n\n\n\n<li>\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u5316\u306e\u5bb9\u6613\u3055<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b9f\u88c5\u4f8b\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">def validate_html_content(content)\n  begin\n    # HTML\u306e\u69cb\u6587\u30c1\u30a7\u30c3\u30af\n    doc = Nokogiri::HTML(content) { |config| config.strict }\n\n    # \u5fc5\u8981\u306a\u8981\u7d20\u306e\u5b58\u5728\u78ba\u8a8d\n    required_elements = doc.css('title, meta[description], h1')\n    raise \"\u5fc5\u9808\u8981\u7d20\u304c\u4e0d\u8db3\u3057\u3066\u3044\u307e\u3059\" if required_elements.empty?\n\n    # \u30c7\u30fc\u30bf\u306e\u5f62\u5f0f\u78ba\u8a8d\n    prices = doc.css('.price').map(&amp;:text)\n    prices.each do |price|\n      raise \"\u4fa1\u683c\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u304c\u4e0d\u6b63\u3067\u3059\" unless price.match?(\/^\\d+\u5186$\/)\n    end\n\n    true\n  rescue =&gt; e\n    logger.error \"\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\u30a8\u30e9\u30fc: #{e.message}\"\n    false\n  end\nend<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u306e\u3088\u3046\u306b\u3001Ruby\u3067HTML\u3092\u6271\u3046\u3053\u3068\u3067\u3001\u4f5c\u696d\u52b9\u7387\u306e\u5411\u4e0a\u3060\u3051\u3067\u306a\u304f\u3001\u54c1\u8cea\u306e\u5411\u4e0a\u3082\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u7279\u306b\u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u51e6\u7406\u3084\u7e70\u308a\u8fd4\u3057\u4f5c\u696d\u304c\u5fc5\u8981\u306a\u5834\u9762\u3067\u306f\u3001\u305d\u306e\u52b9\u679c\u306f\u9855\u8457\u3067\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u6b21\u306e\u30bb\u30af\u30b7\u30e7\u30f3\u3067\u306f\u3001\u3053\u308c\u3089\u306e\u30e1\u30ea\u30c3\u30c8\u3092\u5b9f\u73fe\u3059\u308b\u305f\u3081\u306e\u5177\u4f53\u7684\u306a\u5b9f\u88c5\u30c6\u30af\u30cb\u30c3\u30af\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-3\">Ruby\u3067HTML\u3092\u64cd\u4f5c\u3059\u308b\u57fa\u672c\u30c6\u30af\u30cb\u30c3\u30af<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">Ruby\u3067HTML\u3092\u64cd\u4f5c\u3059\u308b\u969b\u306e\u57fa\u672c\u7684\u306a\u30c6\u30af\u30cb\u30c3\u30af\u306b\u3064\u3044\u3066\u3001\u5b9f\u8df5\u7684\u306a\u30b3\u30fc\u30c9\u4f8b\u3092\u4ea4\u3048\u306a\u304c\u3089\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-4\">Nokogirigem\u3067HTML\u3092\u30d1\u30fc\u30b9\u3059\u308b\u65b9\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u306f\u3001Ruby\u3067HTML\u3084XML\u3092\u64cd\u4f5c\u3059\u308b\u969b\u306e\u6a19\u6e96\u7684\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3059\u3002\u4ee5\u4e0b\u306b\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9\u3092\u793a\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3068\u57fa\u672c\u8a2d\u5b9a<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># Gemfile\u306b\u8ffd\u52a0\ngem 'nokogiri'\n\n# \u30d7\u30ed\u30b0\u30e9\u30e0\u3067\u306e\u8aad\u307f\u8fbc\u307f\nrequire 'nokogiri'\nrequire 'open-uri'\n\n# HTML\u306e\u8aad\u307f\u8fbc\u307f\ndoc = Nokogiri::HTML(URI.open('https:\/\/example.com'))<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li>\u8981\u7d20\u306e\u691c\u7d22\u3068\u53d6\u5f97<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># CSS \u30bb\u30ec\u30af\u30bf\u3092\u4f7f\u7528\u3057\u305f\u8981\u7d20\u306e\u53d6\u5f97\ndoc.css('.article-title')  # \u30af\u30e9\u30b9\u3067\u691c\u7d22\ndoc.css('#main-content')   # ID\u3067\u691c\u7d22\ndoc.css('div p')          # \u968e\u5c64\u95a2\u4fc2\u3067\u691c\u7d22\n\n# XPath\u3092\u4f7f\u7528\u3057\u305f\u8981\u7d20\u306e\u53d6\u5f97\ndoc.xpath('\/\/div[@class=\"article\"]')\ndoc.xpath('\/\/h1[contains(text(), \"Ruby\")]')\n\n# \u30c6\u30ad\u30b9\u30c8\u5185\u5bb9\u306e\u53d6\u5f97\ntitle = doc.at_css('h1').text\ndescription = doc.at_css('meta[name=\"description\"]')['content']<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li>\u8981\u7d20\u306e\u64cd\u4f5c<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># \u65b0\u3057\u3044\u8981\u7d20\u306e\u4f5c\u6210\u3068\u8ffd\u52a0\nnew_div = Nokogiri::HTML::DocumentFragment.parse('&lt;div class=\"new\"&gt;\u65b0\u3057\u3044\u5185\u5bb9&lt;\/div&gt;')\ndoc.at_css('body').add_child(new_div)\n\n# \u5c5e\u6027\u306e\u5909\u66f4\nelement = doc.at_css('.target')\nelement['class'] = 'modified'\nelement['data-value'] = '\u65b0\u3057\u3044\u5024'<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-5\">ERB\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u3067HTML\u3092\u751f\u6210\u3059\u308b\u30d9\u30b9\u30c8\u30d7\u30e9\u30af\u30c6\u30a3\u30b9<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">ERB\uff08Embedded Ruby\uff09\u306f\u3001Ruby\u306e\u30b3\u30fc\u30c9\u3092HTML\u306b\u57cb\u3081\u8fbc\u3093\u3067\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u3092\u4f5c\u6210\u3059\u308b\u305f\u3081\u306e\u5f37\u529b\u306a\u30c4\u30fc\u30eb\u3067\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u57fa\u672c\u7684\u306a\u4f7f\u3044\u65b9<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'erb'\n\n# \u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u306e\u4f5c\u6210\ntemplate = &lt;&lt;-HTML\n&lt;!DOCTYPE html&gt;\n&lt;html&gt;\n  &lt;head&gt;\n    &lt;title&gt;&lt;%= title %&gt;&lt;\/title&gt;\n  &lt;\/head&gt;\n  &lt;body&gt;\n    &lt;h1&gt;&lt;%= heading %&gt;&lt;\/h1&gt;\n    &lt;% items.each do |item| %&gt;\n      &lt;div class=\"item\"&gt;\n        &lt;h2&gt;&lt;%= item.name %&gt;&lt;\/h2&gt;\n        &lt;p&gt;&lt;%= item.description %&gt;&lt;\/p&gt;\n      &lt;\/div&gt;\n    &lt;% end %&gt;\n  &lt;\/body&gt;\n&lt;\/html&gt;\nHTML\n\n# \u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u306e\u5b9f\u884c\ntitle = \"\u5546\u54c1\u4e00\u89a7\"\nheading = \"\u304a\u3059\u3059\u3081\u5546\u54c1\"\nitems = [\n  OpenStruct.new(name: \"\u5546\u54c1A\", description: \"\u8aac\u660e\u6587A\"),\n  OpenStruct.new(name: \"\u5546\u54c1B\", description: \"\u8aac\u660e\u6587B\")\n]\n\nerb = ERB.new(template)\nresult = erb.result(binding)<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li>\u30d1\u30fc\u30b7\u30e3\u30eb\u306e\u6d3b\u7528<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># _header.erb\n&lt;header&gt;\n  &lt;h1&gt;&lt;%= site_title %&gt;&lt;\/h1&gt;\n  &lt;nav&gt;\n    &lt;% navigation_items.each do |item| %&gt;\n      &lt;a href=\"&lt;%= item[:url] %&gt;\"&gt;&lt;%= item[:text] %&gt;&lt;\/a&gt;\n    &lt;% end %&gt;\n  &lt;\/nav&gt;\n&lt;\/header&gt;\n\n# \u30e1\u30a4\u30f3\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\ndef render_partial(partial_name, locals = {})\n  path = \"_#{partial_name}.erb\"\n  template = File.read(path)\n  erb = ERB.new(template)\n  erb.result_with_hash(locals)\nend\n\n# \u30d1\u30fc\u30b7\u30e3\u30eb\u306e\u4f7f\u7528\nheader_html = render_partial('header', {\n  site_title: 'My Site',\n  navigation_items: [\n    { url: '\/', text: 'Home' },\n    { url: '\/about', text: 'About' }\n  ]\n})<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-6\">CGI\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u3063\u305fHTML\u64cd\u4f5c\u306e\u57fa\u790e<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">CGI\u30e9\u30a4\u30d6\u30e9\u30ea\u306f\u3001Web\u30d5\u30a9\u30fc\u30e0\u3084HTTP\u30ea\u30af\u30a8\u30b9\u30c8\u306e\u51e6\u7406\u306b\u4fbf\u5229\u306a\u6a5f\u80fd\u3092\u63d0\u4f9b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u57fa\u672c\u7684\u306a\u30d5\u30a9\u30fc\u30e0\u51e6\u7406<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'cgi'\ncgi = CGI.new\n\n# \u30d5\u30a9\u30fc\u30e0\u30d1\u30e9\u30e1\u30fc\u30bf\u306e\u53d6\u5f97\nname = cgi['name']\nemail = cgi['email']\n\n# HTML\u306e\u30a8\u30b9\u30b1\u30fc\u30d7\u51e6\u7406\nescaped_text = CGI.escapeHTML('&lt;script&gt;alert(\"XSS\");&lt;\/script&gt;')\n\n# \u30ec\u30b9\u30dd\u30f3\u30b9\u306e\u751f\u6210\nprint cgi.header\nprint &lt;&lt;-HTML\n&lt;!DOCTYPE html&gt;\n&lt;html&gt;\n  &lt;head&gt;\n    &lt;title&gt;\u30d5\u30a9\u30fc\u30e0\u51e6\u7406\u7d50\u679c&lt;\/title&gt;\n  &lt;\/head&gt;\n  &lt;body&gt;\n    &lt;h1&gt;\u9001\u4fe1\u3055\u308c\u305f\u60c5\u5831&lt;\/h1&gt;\n    &lt;p&gt;\u540d\u524d: #{CGI.escapeHTML(name)}&lt;\/p&gt;\n    &lt;p&gt;\u30e1\u30fc\u30eb: #{CGI.escapeHTML(email)}&lt;\/p&gt;\n  &lt;\/body&gt;\n&lt;\/html&gt;\nHTML<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li>\u30af\u30c3\u30ad\u30fc\u306e\u51e6\u7406<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># \u30af\u30c3\u30ad\u30fc\u306e\u8a2d\u5b9a\ncookie = CGI::Cookie.new(\n  'name' =&gt; 'user_id',\n  'value' =&gt; '12345',\n  'expires' =&gt; Time.now + 3600\n)\n\n# \u30ec\u30b9\u30dd\u30f3\u30b9\u30d8\u30c3\u30c0\u30fc\u306b\u30af\u30c3\u30ad\u30fc\u3092\u542b\u3081\u308b\nprint cgi.header('cookie' =&gt; [cookie])\n\n# \u30af\u30c3\u30ad\u30fc\u306e\u8aad\u307f\u53d6\u308a\ncookies = cgi.cookies\nuser_id = cookies['user_id'].first if cookies['user_id']<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u57fa\u672c\u30c6\u30af\u30cb\u30c3\u30af\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001HTML\u306e\u89e3\u6790\u3001\u751f\u6210\u3001\u30d5\u30a9\u30fc\u30e0\u51e6\u7406\u306a\u3069\u3001\u69d8\u3005\u306aWeb\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306e\u8981\u4ef6\u306b\u5bfe\u5fdc\u3067\u304d\u307e\u3059\u3002\u6b21\u306e\u30bb\u30af\u30b7\u30e7\u30f3\u3067\u306f\u3001\u3053\u308c\u3089\u306e\u6280\u8853\u3092\u4f7f\u3063\u305f\u5b9f\u8df5\u7684\u306a\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u4f8b\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-7\">\u5b9f\u8df5\u7684\u306aHTML\u64cd\u4f5c\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b9f\u969b\u306e\u958b\u767a\u73fe\u5834\u3067\u4f7f\u3048\u308b\u3001\u5b9f\u8df5\u7684\u306aHTML\u64cd\u4f5c\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002\u30a8\u30e9\u30fc\u51e6\u7406\u3084\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u6700\u9069\u5316\u3082\u542b\u3081\u305f\u3001\u672c\u756a\u74b0\u5883\u3067\u4f7f\u7528\u53ef\u80fd\u306a\u30b3\u30fc\u30c9\u4f8b\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-8\">\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b9\u30af\u30ea\u30d7\u30c8\u306e\u4f5c\u6210\u624b\u9806<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5927\u898f\u6a21\u306aWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u5b89\u5168\u304b\u3064\u52b9\u7387\u7684\u306b\u884c\u3046\u305f\u3081\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u5b9f\u88c5\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'nokogiri'\nrequire 'open-uri'\nrequire 'logger'\nrequire 'csv'\n\nclass WebScraper\n  def initialize\n    @logger = Logger.new('scraping.log')\n    @retry_count = 3\n    @delay = 1 # \u30ea\u30af\u30a8\u30b9\u30c8\u9593\u9694\uff08\u79d2\uff09\n  end\n\n  def scrape_pages(urls)\n    results = []\n    urls.each do |url|\n      begin\n        # \u30ea\u30af\u30a8\u30b9\u30c8\u9593\u9694\u3092\u8a2d\u5b9a\u3057\u3066\u30b5\u30fc\u30d0\u30fc\u306b\u8ca0\u8377\u3092\u304b\u3051\u306a\u3044\n        sleep @delay\n\n        # \u30da\u30fc\u30b8\u306e\u53d6\u5f97\u3092\u8a66\u884c\n        data = fetch_with_retry(url)\n        results &lt;&lt; data if data\n\n      rescue StandardError =&gt; e\n        @logger.error(\"Error scraping #{url}: #{e.message}\")\n        next\n      end\n    end\n    save_to_csv(results)\n    results\n  end\n\n  private\n\n  def fetch_with_retry(url)\n    tries = 0\n    begin\n      doc = Nokogiri::HTML(URI.open(url))\n      extract_data(doc)\n    rescue OpenURI::HTTPError, SocketError =&gt; e\n      tries += 1\n      if tries &lt; @retry_count\n        @logger.warn(\"Retry #{tries}\/#{@retry_count} for #{url}\")\n        sleep(@delay * tries)\n        retry\n      else\n        raise e\n      end\n    end\n  end\n\n  def extract_data(doc)\n    {\n      title: doc.at_css('h1')&amp;.text&amp;.strip,\n      description: doc.at_css('meta[name=\"description\"]')&amp;.[]('content'),\n      price: doc.at_css('.price')&amp;.text&amp;.gsub(\/[^\\d]\/, ''),\n      categories: doc.css('.category').map(&amp;:text)\n    }\n  end\n\n  def save_to_csv(results)\n    CSV.open('scraped_data.csv', 'wb') do |csv|\n      csv &lt;&lt; results.first.keys\n      results.each { |row| csv &lt;&lt; row.values }\n    end\n  end\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-9\">\u52d5\u7684\u306aWeb\u30da\u30fc\u30b8\u306e\u751f\u6210\u30c6\u30af\u30cb\u30c3\u30af<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u30e6\u30fc\u30b6\u30fc\u5165\u529b\u306b\u5fdc\u3058\u3066\u52d5\u7684\u306bHTML\u3092\u751f\u6210\u3059\u308b\u5b9f\u88c5\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">class DynamicPageGenerator\n  def initialize(template_dir)\n    @template_dir = template_dir\n    @cache = {}\n  end\n\n  def generate_page(template_name, data)\n    template = load_template(template_name)\n    layout = load_template('layout')\n\n    # XSS\u5bfe\u7b56\n    sanitized_data = sanitize_data(data)\n\n    # \u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u306e\u5b9f\u884c\n    content = ERB.new(template).result_with_hash(sanitized_data)\n\n    # \u30ec\u30a4\u30a2\u30a6\u30c8\u3078\u306e\u57cb\u3081\u8fbc\u307f\n    ERB.new(layout).result_with_hash(\n      content: content,\n      title: sanitized_data[:title],\n      meta_description: sanitized_data[:description]\n    )\n  end\n\n  private\n\n  def load_template(name)\n    @cache[name] ||= begin\n      path = File.join(@template_dir, \"#{name}.erb\")\n      File.read(path)\n    end\n  end\n\n  def sanitize_data(data)\n    data.transform_values do |value|\n      case value\n      when String\n        CGI.escapeHTML(value)\n      when Array\n        value.map { |v| v.is_a?(String) ? CGI.escapeHTML(v) : v }\n      else\n        value\n      end\n    end\n  end\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-10\">\u30d5\u30a9\u30fc\u30e0\u51e6\u7406\u306e\u5b9f\u88c5\u65b9\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u30bb\u30ad\u30e5\u30a2\u3067\u4f7f\u3044\u3084\u3059\u3044\u30d5\u30a9\u30fc\u30e0\u51e6\u7406\u306e\u5b9f\u88c5\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'sinatra'\nrequire 'rack\/csrf'\n\nclass SecureFormHandler &lt; Sinatra::Base\n  use Rack::Session::Cookie, secret: ENV['SESSION_SECRET']\n  use Rack::Csrf, raise: true\n\n  configure do\n    set :views, '.\/views'\n    enable :logging\n  end\n\n  # \u30d5\u30a9\u30fc\u30e0\u306e\u8868\u793a\n  get '\/contact' do\n    erb :contact, locals: {\n      csrf_token: Rack::Csrf.token(env),\n      csrf_tag: Rack::Csrf.tag(env)\n    }\n  end\n\n  # \u30d5\u30a9\u30fc\u30e0\u306e\u51e6\u7406\n  post '\/contact' do\n    begin\n      # \u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\n      validate_form_data(params)\n\n      # \u30c7\u30fc\u30bf\u306e\u4fdd\u5b58\n      save_contact_form(params)\n\n      # \u30e1\u30fc\u30eb\u9001\u4fe1\n      send_notification_email(params)\n\n      # \u6210\u529f\u30da\u30fc\u30b8\u3078\u30ea\u30c0\u30a4\u30ec\u30af\u30c8\n      redirect '\/contact\/thanks'\n\n    rescue ValidationError =&gt; e\n      # \u30a8\u30e9\u30fc\u6642\u306e\u51e6\u7406\n      status 422\n      erb :contact, locals: {\n        errors: e.messages,\n        params: params,\n        csrf_token: Rack::Csrf.token(env),\n        csrf_tag: Rack::Csrf.tag(env)\n      }\n    end\n  end\n\n  private\n\n  def validate_form_data(params)\n    errors = []\n    errors &lt;&lt; \"\u540d\u524d\u306f\u5fc5\u9808\u3067\u3059\" if params[:name].to_s.empty?\n    errors &lt;&lt; \"\u30e1\u30fc\u30eb\u30a2\u30c9\u30ec\u30b9\u306f\u5fc5\u9808\u3067\u3059\" if params[:email].to_s.empty?\n    errors &lt;&lt; \"\u30e1\u30fc\u30eb\u30a2\u30c9\u30ec\u30b9\u306e\u5f62\u5f0f\u304c\u4e0d\u6b63\u3067\u3059\" unless params[:email] =~ \/\\A[\\w+\\-.]+@[a-z\\d\\-.]+\\.[a-z]+\\z\/i\n\n    raise ValidationError.new(errors) unless errors.empty?\n  end\n\n  def save_contact_form(params)\n    # \u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3078\u306e\u4fdd\u5b58\u51e6\u7406\n    Contact.create!(\n      name: params[:name],\n      email: params[:email],\n      message: params[:message],\n      ip_address: request.ip,\n      user_agent: request.user_agent\n    )\n  end\n\n  def send_notification_email(params)\n    # \u30e1\u30fc\u30eb\u9001\u4fe1\u51e6\u7406\n    Mailer.contact_notification(\n      to: ENV['ADMIN_EMAIL'],\n      from: params[:email],\n      subject: \"\u65b0\u3057\u3044\u304a\u554f\u3044\u5408\u308f\u305b: #{params[:name]}\",\n      body: params[:message]\n    ).deliver_now\n  end\nend\n\nclass ValidationError &lt; StandardError\n  attr_reader :messages\n\n  def initialize(messages)\n    @messages = messages\n    super(messages.join(\", \"))\n  end\nend<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u5b9f\u88c5\u4f8b\u306f\u3001\u4ee5\u4e0b\u306e\u91cd\u8981\u306a\u70b9\u3092\u8003\u616e\u3057\u3066\u3044\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30a8\u30e9\u30fc\u51e6\u7406<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9069\u5207\u306a\u4f8b\u5916\u51e6\u7406<\/li>\n\n\n\n<li>\u30ea\u30c8\u30e9\u30a4\u6a5f\u69cb<\/li>\n\n\n\n<li>\u30ed\u30b0\u8a18\u9332<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>XSS\u5bfe\u7b56<\/li>\n\n\n\n<li>CSRF\u5bfe\u7b56<\/li>\n\n\n\n<li>\u5165\u529b\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u6d3b\u7528<\/li>\n\n\n\n<li>\u9069\u5207\u306a\u30ea\u30af\u30a8\u30b9\u30c8\u9593\u9694<\/li>\n\n\n\n<li>\u52b9\u7387\u7684\u306a\u30c7\u30fc\u30bf\u51e6\u7406<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30e1\u30f3\u30c6\u30ca\u30f3\u30b9\u6027<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30af\u30e9\u30b9\u30d9\u30fc\u30b9\u306e\u8a2d\u8a08<\/li>\n\n\n\n<li>\u8cac\u52d9\u306e\u5206\u96e2<\/li>\n\n\n\n<li>\u8a2d\u5b9a\u306e\u5916\u90e8\u5316<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u30b3\u30fc\u30c9\u306f\u5b9f\u52d9\u3067\u306e\u5229\u7528\u3092\u60f3\u5b9a\u3057\u3066\u4f5c\u6210\u3055\u308c\u3066\u3044\u307e\u3059\u304c\u3001\u5b9f\u969b\u306e\u4f7f\u7528\u6642\u306b\u306f\u3001\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306e\u8981\u4ef6\u306b\u5fdc\u3058\u3066\u9069\u5207\u306b\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-11\">HTML\u64cd\u4f5c\u6642\u306e\u6ce8\u610f\u70b9\u3068\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">Ruby\u3067HTML\u3092\u64cd\u4f5c\u3059\u308b\u969b\u306b\u76f4\u9762\u3059\u308b\u53ef\u80fd\u6027\u306e\u3042\u308b\u554f\u984c\u3068\u3001\u305d\u306e\u89e3\u6c7a\u65b9\u6cd5\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002\u5b9f\u969b\u306e\u958b\u767a\u73fe\u5834\u3067\u5f79\u7acb\u3064\u5177\u4f53\u7684\u306a\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u624b\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-12\">\u6587\u5b57\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u306e\u9069\u5207\u306a\u51e6\u7406\u65b9\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u6587\u5b57\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u306e\u554f\u984c\u306f\u3001\u7279\u306b\u65e5\u672c\u8a9e\u3092\u6271\u3046\u969b\u306b\u3088\u304f\u767a\u751f\u3057\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u4e3b\u306a\u5bfe\u51e6\u65b9\u6cd5\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">class EncodingHandler\n  def self.process_html(html_content)\n    # \u6587\u5b57\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u306e\u81ea\u52d5\u691c\u51fa\u3068\u5909\u63db\n    detected_encoding = detect_encoding(html_content)\n\n    # UTF-8\u3078\u306e\u5909\u63db\n    content_utf8 = if detected_encoding\n      html_content.force_encoding(detected_encoding).encode('UTF-8')\n    else\n      html_content.force_encoding('UTF-8')\n    end\n\n    # \u4e0d\u6b63\u306a\u30d0\u30a4\u30c8\u5217\u306e\u51e6\u7406\n    content_utf8.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')\n  end\n\n  private\n\n  def self.detect_encoding(content)\n    require 'charlock_holmes'\n    detection = CharlockHolmes::EncodingDetector.detect(content)\n    detection[:encoding] if detection\n  end\nend\n\n# \u4f7f\u7528\u4f8b\nbegin\n  html_content = File.read('webpage.html', encoding: 'ASCII-8BIT')\n  processed_content = EncodingHandler.process_html(html_content)\n\n  # Nokogiri\u3067\u30d1\u30fc\u30b9\n  doc = Nokogiri::HTML(processed_content)\nrescue EncodingError =&gt; e\n  logger.error \"\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u30a8\u30e9\u30fc: #{e.message}\"\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-13\">\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5927\u91cf\u306eHTML\u30c7\u30fc\u30bf\u3092\u51e6\u7406\u3059\u308b\u969b\u306e\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u3092\u6700\u9069\u5316\u3059\u308b\u65b9\u6cd5\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">class MemoryOptimizedParser\n  def initialize\n    @batch_size = 1000\n    @logger = Logger.new('memory_usage.log')\n  end\n\n  def process_large_html_file(file_path)\n    # \u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u30e2\u30cb\u30bf\u30ea\u30f3\u30b0\n    initial_memory = memory_usage\n\n    File.open(file_path) do |file|\n      # \u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u51e6\u7406\u3067\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u307f\n      Nokogiri::HTML::SAX::Parser.new(DocumentHandler.new).parse_io(file)\n    end\n\n    log_memory_usage(initial_memory)\n  end\n\n  def batch_process_elements(doc)\n    doc.css('target_element').each_slice(@batch_size) do |elements|\n      elements.each do |element|\n        yield element\n      end\n\n      # \u30e1\u30e2\u30ea\u306e\u89e3\u653e\n      GC.start if memory_critical?\n    end\n  end\n\n  private\n\n  def memory_usage\n    `ps -o rss= -p #{Process.pid}`.to_i\n  end\n\n  def memory_critical?\n    memory_usage &gt; 1_000_000 # 1GB\u8d85\u904e\u3067\u8b66\u544a\n  end\n\n  def log_memory_usage(initial_memory)\n    current_memory = memory_usage\n    @logger.info \"\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf: #{current_memory - initial_memory}KB \u5897\u52a0\"\n  end\nend\n\n# SAX\u30d1\u30fc\u30b5\u30fc\u7528\u306e\u30cf\u30f3\u30c9\u30e9\u30fc\nclass DocumentHandler &lt; Nokogiri::XML::SAX::Document\n  def start_element(name, attributes = [])\n    # \u8981\u7d20\u306e\u958b\u59cb\u30bf\u30b0\u306e\u51e6\u7406\n  end\n\n  def end_element(name)\n    # \u8981\u7d20\u306e\u7d42\u4e86\u30bf\u30b0\u306e\u51e6\u7406\n  end\n\n  def characters(string)\n    # \u30c6\u30ad\u30b9\u30c8\u30ce\u30fc\u30c9\u306e\u51e6\u7406\n  end\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-14\">\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u5bfe\u7b56\u306e\u5b9f\u88c5\u30dd\u30a4\u30f3\u30c8<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">HTML\u64cd\u4f5c\u6642\u306e\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u30ea\u30b9\u30af\u3068\u3001\u305d\u306e\u5bfe\u7b56\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">class SecureHTMLProcessor\n  ALLOWED_TAGS = %w(p br b i u h1 h2 h3 ul ol li)\n  ALLOWED_ATTRIBUTES = %w(href title alt)\n\n  def initialize\n    @sanitizer = Rails::HTML::SafeListSanitizer.new\n  end\n\n  def sanitize_html(html_content)\n    # HTML\u306e\u7121\u5bb3\u5316\n    @sanitizer.sanitize(html_content,\n      tags: ALLOWED_TAGS,\n      attributes: ALLOWED_ATTRIBUTES\n    )\n  end\n\n  def process_user_input(input)\n    # XSS\u5bfe\u7b56\n    escaped_input = CGI.escapeHTML(input)\n\n    # SQL\u30a4\u30f3\u30b8\u30a7\u30af\u30b7\u30e7\u30f3\u5bfe\u7b56\n    sanitized_input = ActiveRecord::Base.connection.quote(escaped_input)\n\n    # \u30d1\u30b9\u30c8\u30e9\u30d0\u30fc\u30b5\u30eb\u5bfe\u7b56\n    safe_path = File.basename(input)\n\n    {\n      escaped: escaped_input,\n      sanitized_sql: sanitized_input,\n      safe_path: safe_path\n    }\n  end\n\n  def validate_url(url)\n    uri = URI.parse(url)\n    return false unless %w(http https).include?(uri.scheme)\n\n    # \u30db\u30b9\u30c8\u306e\u691c\u8a3c\n    allowed_hosts = ['example.com', 'api.example.com']\n    return false unless allowed_hosts.include?(uri.host)\n\n    true\n  rescue URI::InvalidURIError\n    false\n  end\n\n  def secure_file_write(content, path)\n    # \u5b89\u5168\u306a\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306e\u78ba\u8a8d\n    raise \"\u4e0d\u6b63\u306a\u30d1\u30b9\" unless safe_directory?(path)\n\n    # \u4e00\u6642\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u7528\u3057\u305f\u5b89\u5168\u306a\u66f8\u304d\u8fbc\u307f\n    temp_path = \"#{path}.tmp\"\n    File.write(temp_path, content)\n    File.rename(temp_path, path)\n  rescue =&gt; e\n    File.unlink(temp_path) if File.exist?(temp_path)\n    raise e\n  end\n\n  private\n\n  def safe_directory?(path)\n    allowed_dirs = ['\/var\/www\/html\/', '\/tmp\/safe\/']\n    allowed_dirs.any? { |dir| path.start_with?(dir) }\n  end\nend<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e3b\u306a\u6ce8\u610f\u70b9\u3068\u305d\u306e\u5bfe\u7b56\u3092\u307e\u3068\u3081\u308b\u3068\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u6587\u5b57\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5165\u529b\u30c7\u30fc\u30bf\u306e\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u691c\u51fa<\/li>\n\n\n\n<li>UTF-8\u3078\u306e\u9069\u5207\u306a\u5909\u63db<\/li>\n\n\n\n<li>\u4e0d\u6b63\u306a\u30d0\u30a4\u30c8\u5217\u306e\u51e6\u7406<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30e1\u30e2\u30ea\u7ba1\u7406<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30d0\u30c3\u30c1\u51e6\u7406\u306e\u6d3b\u7528<\/li>\n\n\n\n<li>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u51e6\u7406\u306e\u5229\u7528<\/li>\n\n\n\n<li>\u5b9a\u671f\u7684\u306aGC\u306e\u5b9f\u884c<\/li>\n\n\n\n<li>\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u30e2\u30cb\u30bf\u30ea\u30f3\u30b0<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>HTML\u30b5\u30cb\u30bf\u30a4\u30ba<\/li>\n\n\n\n<li>XSS\u5bfe\u7b56<\/li>\n\n\n\n<li>SQL\u30a4\u30f3\u30b8\u30a7\u30af\u30b7\u30e7\u30f3\u5bfe\u7b56<\/li>\n\n\n\n<li>\u30d1\u30b9\u30c8\u30e9\u30d0\u30fc\u30b5\u30eb\u5bfe\u7b56<\/li>\n\n\n\n<li>URL\u691c\u8a3c<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u5bfe\u7b56\u3092\u9069\u5207\u306b\u5b9f\u88c5\u3059\u308b\u3053\u3068\u3067\u3001\u5b89\u5168\u3067\u52b9\u7387\u7684\u306aHTML\u64cd\u4f5c\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u305f\u3060\u3057\u3001\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u5bfe\u7b56\u306f\u5e38\u306b\u6700\u65b0\u306e\u8105\u5a01\u306b\u5bfe\u5fdc\u3059\u308b\u5fc5\u8981\u304c\u3042\u308b\u305f\u3081\u3001\u5b9a\u671f\u7684\u306a\u898b\u76f4\u3057\u3068\u66f4\u65b0\u3092\u884c\u3046\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-15\">\u5b9f\u52d9\u3067\u4f7f\u3048\u308b\u30b3\u30fc\u30c9\u4f8b\u3068\u30b5\u30f3\u30d7\u30eb<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u3053\u3067\u306f\u3001\u5b9f\u52d9\u3067\u305d\u306e\u307e\u307e\u4f7f\u7528\u3067\u304d\u308b\u5177\u4f53\u7684\u306a\u30b3\u30fc\u30c9\u4f8b\u3092\u63d0\u4f9b\u3057\u307e\u3059\u3002\u5404\u5b9f\u88c5\u306b\u306f\u8a73\u7d30\u306a\u30b3\u30e1\u30f3\u30c8\u3068\u4f7f\u7528\u65b9\u6cd5\u306e\u8aac\u660e\u3092\u4ed8\u8a18\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-16\">HTML\u30d1\u30fc\u30b5\u30fc\u306e\u5b9f\u88c5\u4f8b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u8907\u6570\u306eWeb\u30da\u30fc\u30b8\u304b\u3089\u5fc5\u8981\u306a\u60c5\u5831\u3092\u62bd\u51fa\u3057\u3001\u69cb\u9020\u5316\u3055\u308c\u305f\u30c7\u30fc\u30bf\u3068\u3057\u3066\u4fdd\u5b58\u3059\u308b\u5b9f\u7528\u7684\u306aHTML\u30d1\u30fc\u30b5\u30fc\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'nokogiri'\nrequire 'open-uri'\nrequire 'json'\nrequire 'logger'\n\nclass HTMLParser\n  class ParserError &lt; StandardError; end\n\n  def initialize(config = {})\n    @config = {\n      cache_enabled: true,\n      cache_duration: 3600,  # 1\u6642\u9593\n      retry_count: 3,\n      retry_delay: 1,\n      user_agent: 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36',\n      timeout: 30\n    }.merge(config)\n\n    @logger = Logger.new('parser.log')\n    @cache = {}\n  end\n\n  def parse(url, selectors)\n    content = fetch_with_cache(url)\n    doc = Nokogiri::HTML(content)\n\n    result = {}\n    selectors.each do |key, selector|\n      result[key] = extract_data(doc, selector)\n    end\n\n    result\n  rescue =&gt; e\n    @logger.error \"Parsing error for #{url}: #{e.message}\"\n    raise ParserError, \"Failed to parse #{url}: #{e.message}\"\n  end\n\n  private\n\n  def fetch_with_cache(url)\n    return @cache[url][:content] if cache_valid?(url)\n\n    content = fetch_with_retry(url)\n    cache_store(url, content) if @config[:cache_enabled]\n    content\n  end\n\n  def cache_valid?(url)\n    return false unless @config[:cache_enabled]\n    return false unless @cache[url]\n\n    cache_time = @cache[url][:timestamp]\n    Time.now - cache_time &lt; @config[:cache_duration]\n  end\n\n  def cache_store(url, content)\n    @cache[url] = {\n      content: content,\n      timestamp: Time.now\n    }\n  end\n\n  def fetch_with_retry(url)\n    retries = 0\n    begin\n      URI.open(\n        url,\n        'User-Agent' =&gt; @config[:user_agent],\n        read_timeout: @config[:timeout]\n      ).read\n    rescue OpenURI::HTTPError, SocketError =&gt; e\n      retries += 1\n      if retries &lt; @config[:retry_count]\n        sleep(@config[:retry_delay] * retries)\n        retry\n      else\n        raise e\n      end\n    end\n  end\n\n  def extract_data(doc, selector)\n    case selector\n    when String\n      doc.css(selector).text.strip\n    when Hash\n      if selector[:type] == 'attribute'\n        doc.css(selector[:selector])[selector[:attribute]]\n      elsif selector[:type] == 'array'\n        doc.css(selector[:selector]).map(&amp;:text).map(&amp;:strip)\n      end\n    end\n  end\nend\n\n# \u4f7f\u7528\u4f8b\nparser = HTMLParser.new(\n  cache_enabled: true,\n  cache_duration: 1800  # 30\u5206\n)\n\nselectors = {\n  title: 'h1.article-title',\n  description: 'meta[name=\"description\"]',\n  tags: {\n    type: 'array',\n    selector: '.tag'\n  },\n  image_url: {\n    type: 'attribute',\n    selector: 'meta[property=\"og:image\"]',\n    attribute: 'content'\n  }\n}\n\nbegin\n  result = parser.parse('https:\/\/example.com\/article', selectors)\n  puts JSON.pretty_generate(result)\nrescue HTMLParser::ParserError =&gt; e\n  puts \"\u30a8\u30e9\u30fc\u304c\u767a\u751f\u3057\u307e\u3057\u305f: #{e.message}\"\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-17\">\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u30a8\u30f3\u30b8\u30f3\u306e\u6d3b\u7528\u4f8b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u518d\u5229\u7528\u53ef\u80fd\u306a\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u3092\u6301\u3064\u3001\u5b9f\u7528\u7684\u306a\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u30a8\u30f3\u30b8\u30f3\u306e\u5b9f\u88c5\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'erb'\nrequire 'ostruct'\n\nclass TemplateEngine\n  class RenderError &lt; StandardError; end\n\n  def initialize(template_dir)\n    @template_dir = template_dir\n    @components = {}\n    @helpers = Module.new\n    load_components\n  end\n\n  def render(template_name, locals = {})\n    template = load_template(template_name)\n    context = create_context(locals)\n\n    ERB.new(template).result(context.instance_eval { binding })\n  rescue =&gt; e\n    raise RenderError, \"Template rendering failed: #{e.message}\"\n  end\n\n  def register_helper(name, &amp;block)\n    @helpers.define_method(name, &amp;block)\n  end\n\n  def component(name, locals = {})\n    raise RenderError, \"Component not found: #{name}\" unless @components[name]\n    render(@components[name], locals)\n  end\n\n  private\n\n  def load_template(name)\n    path = File.join(@template_dir, \"#{name}.erb\")\n    File.read(path)\n  rescue Errno::ENOENT\n    raise RenderError, \"Template not found: #{name}\"\n  end\n\n  def load_components\n    component_dir = File.join(@template_dir, 'components')\n    return unless Dir.exist?(component_dir)\n\n    Dir.glob(File.join(component_dir, '*.erb')).each do |file|\n      name = File.basename(file, '.erb')\n      @components[name] = \"components\/#{name}\"\n    end\n  end\n\n  def create_context(locals)\n    context = OpenStruct.new(locals)\n    context.extend(@helpers)\n    context.define_singleton_method(:component) { |name, **opts| component(name, opts) }\n    context\n  end\nend\n\n# \u4f7f\u7528\u4f8b\n\n# \u30d8\u30eb\u30d1\u30fc\u30e1\u30bd\u30c3\u30c9\u306e\u5b9a\u7fa9\nengine = TemplateEngine.new('templates')\n\nengine.register_helper(:format_date) do |date|\n  date.strftime('%Y\u5e74%m\u6708%d\u65e5')\nend\n\nengine.register_helper(:sanitize) do |text|\n  CGI.escapeHTML(text)\nend\n\n# \u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u306e\u4f8b\uff08templates\/article.erb\uff09\n=begin\n&lt;!DOCTYPE html&gt;\n&lt;html&gt;\n  &lt;head&gt;\n    &lt;title&gt;&lt;%= title %&gt;&lt;\/title&gt;\n  &lt;\/head&gt;\n  &lt;body&gt;\n    &lt;%= component 'header', title: title %&gt;\n\n    &lt;article&gt;\n      &lt;h1&gt;&lt;%= sanitize(title) %&gt;&lt;\/h1&gt;\n      &lt;time&gt;&lt;%= format_date(published_at) %&gt;&lt;\/time&gt;\n      &lt;%= content %&gt;\n    &lt;\/article&gt;\n\n    &lt;%= component 'footer' %&gt;\n  &lt;\/body&gt;\n&lt;\/html&gt;\n=end\n\n# \u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8\u306e\u4f8b\uff08templates\/components\/header.erb\uff09\n=begin\n&lt;header&gt;\n  &lt;h1&gt;&lt;%= title %&gt;&lt;\/h1&gt;\n  &lt;nav&gt;\n    &lt;a href=\"\/\"&gt;Home&lt;\/a&gt;\n    &lt;a href=\"\/about\"&gt;About&lt;\/a&gt;\n  &lt;\/nav&gt;\n&lt;\/header&gt;\n=end\n\n# \u4f7f\u7528\u4f8b\nbegin\n  html = engine.render('article', {\n    title: '\u8a18\u4e8b\u30bf\u30a4\u30c8\u30eb',\n    content: '\u8a18\u4e8b\u306e\u5185\u5bb9...',\n    published_at: Time.now\n  })\n  puts html\nrescue TemplateEngine::RenderError =&gt; e\n  puts \"\u30ec\u30f3\u30c0\u30ea\u30f3\u30b0\u30a8\u30e9\u30fc: #{e.message}\"\nend<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-18\">\u5b9f\u7528\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b3\u30fc\u30c9<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u8907\u6570\u30da\u30fc\u30b8\u306e\u5de1\u56de\u3084\u4e26\u5217\u51e6\u7406\u306b\u5bfe\u5fdc\u3057\u305f\u3001\u5b9f\u7528\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u5b9f\u88c5\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">require 'nokogiri'\nrequire 'open-uri'\nrequire 'concurrent'\nrequire 'csv'\nrequire 'logger'\n\nclass WebCrawler\n  class CrawlError &lt; StandardError; end\n\n  def initialize(config = {})\n    @config = {\n      max_threads: 5,\n      max_depth: 3,\n      delay: 1,\n      max_pages: 1000,\n      output_file: 'crawl_results.csv',\n      allowed_domains: []\n    }.merge(config)\n\n    @visited = Concurrent::Set.new\n    @queue = Queue.new\n    @results = Concurrent::Array.new\n    @logger = Logger.new('crawler.log')\n  end\n\n  def crawl(start_url, selectors)\n    @start_time = Time.now\n    @queue.push([start_url, 0])\n\n    threads = @config[:max_threads].times.map do\n      Thread.new do\n        while !@queue.empty? &amp;&amp; @visited.size &lt; @config[:max_pages]\n          process_url(@queue.pop, selectors)\n        end\n      end\n    end\n\n    threads.each(&amp;:join)\n    save_results\n\n    log_summary\n  rescue =&gt; e\n    @logger.error \"Crawl error: #{e.message}\"\n    raise CrawlError, \"Crawling failed: #{e.message}\"\n  end\n\n  private\n\n  def process_url((url, depth), selectors)\n    return if depth &gt;= @config[:max_depth]\n    return if @visited.include?(url)\n    return unless allowed_domain?(url)\n\n    @visited.add(url)\n    sleep(@config[:delay])\n\n    begin\n      doc = Nokogiri::HTML(URI.open(url))\n      data = extract_data(doc, selectors)\n      @results &lt;&lt; data.merge(url: url)\n\n      # \u6b21\u306eURL\u3092\u62bd\u51fa\n      next_urls = doc.css('a').map { |link| link['href'] }\n      next_urls.each do |next_url|\n        next unless next_url\n        absolute_url = URI.join(url, next_url).to_s\n        @queue.push([absolute_url, depth + 1])\n      end\n    rescue =&gt; e\n      @logger.warn \"Failed to process #{url}: #{e.message}\"\n    end\n  end\n\n  def extract_data(doc, selectors)\n    result = {}\n    selectors.each do |key, selector|\n      result[key] = case selector\n      when String\n        doc.css(selector).text.strip\n      when Hash\n        if selector[:type] == 'array'\n          doc.css(selector[:selector]).map(&amp;:text).map(&amp;:strip)\n        else\n          doc.css(selector[:selector]).first&amp;.[](selector[:attribute])\n        end\n      end\n    end\n    result\n  end\n\n  def allowed_domain?(url)\n    return true if @config[:allowed_domains].empty?\n\n    uri = URI.parse(url)\n    @config[:allowed_domains].any? { |domain| uri.host.end_with?(domain) }\n  end\n\n  def save_results\n    CSV.open(@config[:output_file], 'wb') do |csv|\n      csv &lt;&lt; @results.first.keys\n      @results.each { |result| csv &lt;&lt; result.values }\n    end\n  end\n\n  def log_summary\n    duration = Time.now - @start_time\n    @logger.info \"Crawl completed:\"\n    @logger.info \"Pages processed: #{@visited.size}\"\n    @logger.info \"Data collected: #{@results.size} items\"\n    @logger.info \"Duration: #{duration.round(2)} seconds\"\n  end\nend\n\n# \u4f7f\u7528\u4f8b\ncrawler = WebCrawler.new(\n  max_threads: 3,\n  max_depth: 2,\n  delay: 1.5,\n  allowed_domains: ['example.com'],\n  output_file: 'products.csv'\n)\n\nselectors = {\n  title: 'h1.product-title',\n  price: '.price',\n  description: 'meta[name=\"description\"]',\n  images: {\n    type: 'array',\n    selector: '.product-images img'\n  }\n}\n\nbegin\n  crawler.crawl('https:\/\/example.com\/products', selectors)\nrescue WebCrawler::CrawlError =&gt; e\n  puts \"\u30af\u30ed\u30fc\u30eb\u30a8\u30e9\u30fc: #{e.message}\"\nend<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u30b3\u30fc\u30c9\u4f8b\u306f\u4ee5\u4e0b\u306e\u7279\u5fb4\u3092\u6301\u3063\u3066\u3044\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9069\u5207\u306a\u4f8b\u5916\u51e6\u7406<\/li>\n\n\n\n<li>\u30ea\u30c8\u30e9\u30a4\u30e1\u30ab\u30cb\u30ba\u30e0<\/li>\n\n\n\n<li>\u30ed\u30b0\u8a18\u9332<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u6700\u9069\u5316<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30ad\u30e3\u30c3\u30b7\u30e5\u6a5f\u69cb<\/li>\n\n\n\n<li>\u4e26\u5217\u51e6\u7406<\/li>\n\n\n\n<li>\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u5236\u5fa1<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u62e1\u5f35\u6027<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8a2d\u5b9a\u306e\u30ab\u30b9\u30bf\u30de\u30a4\u30ba<\/li>\n\n\n\n<li>\u30e2\u30b8\u30e5\u30fc\u30eb\u5316\u3055\u308c\u305f\u8a2d\u8a08<\/li>\n\n\n\n<li>\u518d\u5229\u7528\u53ef\u80fd\u306a\u30b3\u30f3\u30dd\u30fc\u30cd\u30f3\u30c8<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5b9f\u7528\u7684\u306a\u6a5f\u80fd<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9032\u6357\u306e\u30ed\u30b0\u8a18\u9332<\/li>\n\n\n\n<li>\u7d50\u679c\u306e\u4fdd\u5b58<\/li>\n\n\n\n<li>\u8c4a\u5bcc\u306a\u30aa\u30d7\u30b7\u30e7\u30f3<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u30b3\u30fc\u30c9\u306f\u5b9f\u969b\u306e\u958b\u767a\u73fe\u5834\u3067\u306e\u8981\u4ef6\u3092\u60f3\u5b9a\u3057\u3066\u4f5c\u6210\u3055\u308c\u3066\u3044\u307e\u3059\u304c\u3001\u4f7f\u7528\u6642\u306b\u306f\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u9069\u5207\u306b\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002\u307e\u305f\u3001\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u9762\u3067\u306e\u8003\u616e\u4e8b\u9805\u3082\u5fc5\u305a\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Warning: Undefined array key &#8220;is_admin&#8221; in \/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/ &#8230; <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[],"class_list":["post-1295","post","type-post","status-publish","format-standard","category-ruby","nothumb"],"_links":{"self":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1295","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1295"}],"version-history":[{"count":1,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1295\/revisions"}],"predecessor-version":[{"id":1296,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1295\/revisions\/1296"}],"wp:attachment":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1295"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1295"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1295"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}