{"id":1227,"date":"2025-03-24T08:52:33","date_gmt":"2025-03-23T23:52:33","guid":{"rendered":"https:\/\/dexall.co.jp\/articles\/?p=1227"},"modified":"2025-03-24T08:52:33","modified_gmt":"2025-03-23T23:52:33","slug":"%e3%80%90%e4%bf%9d%e5%ad%98%e7%89%88%e3%80%91nokogiri%e3%81%a7web%e3%82%b9%e3%82%af%e3%83%ac%e3%82%a4%e3%83%94%e3%83%b3%e3%82%b0%e3%82%92%e5%ae%8c%e5%85%a8%e6%94%bb%e7%95%a5%ef%bc%81%e5%ae%9f%e8%b7%b5","status":"publish","type":"post","link":"https:\/\/dexall.co.jp\/articles\/?p=1227","title":{"rendered":"\u3010\u4fdd\u5b58\u7248\u3011Nokogiri\u3067Web\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u5b8c\u5168\u653b\u7565\uff01\u5b9f\u8df5\u7684\u306a7\u3064\u306e\u30c6\u30af\u30cb\u30c3\u30af"},"content":{"rendered":"\n<div class=\"toc\"><br \/>\n<b>Warning<\/b>:  Undefined array key \"is_admin\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>116<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_category_top\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>121<\/b><br \/>\n<br \/>\n<b>Warning<\/b>:  Undefined array key \"is_top\" in <b>\/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/sango-theme\/library\/gutenberg\/dist\/classes\/Toc.php<\/b> on line <b>128<\/b><br \/>\n    <div id=\"toc_container\" class=\"sgb-toc--bullets js-smooth-scroll\" data-dialog-title=\"\u76ee\u6b21\">\n      <p class=\"toc_title\">\u76ee\u6b21 <\/p>\n      <ul class=\"toc_list\">  <li class=\"first\">    <a href=\"#i-0\">Nokogiri\u3068\u306f\uff1fWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u5f37\u529b\u306a\u5473\u65b9<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-1\">Ruby\u306eWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u5b9a\u756a\u3068\u3057\u3066\u9078\u3070\u308c\u308b\u7406\u7531<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-2\">Nokogiri\u304c\u30b5\u30dd\u30fc\u30c8\u3059\u308b\u30d1\u30fc\u30b5\u30fc\u3068\u305d\u306e\u7279\u5fb4<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-3\">\u74b0\u5883\u69cb\u7bc9\u304b\u3089\u59cb\u3081\u308bNokogiri\u5165\u9580<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-4\">gem install\u304b\u3089bundler\u3067\u306e\u7ba1\u7406\u307e\u3067<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-5\">\u3088\u304f\u3042\u308b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u30a8\u30e9\u30fc\u3068\u305d\u306e\u89e3\u6c7a\u65b9\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-6\">Nokogiri \u306e\u57fa\u672c\u64cd\u4f5c\u30de\u30b9\u30bf\u30fc<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-7\">HTML \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u8aad\u307f\u8fbc\u307f\u3068\u89e3\u6790<\/a>      <\/li>      <li>        <a href=\"#i-8\">CSS \u30bb\u30ec\u30af\u30bf\u3092\u4f7f\u7528\u3057\u305f\u8981\u7d20\u306e\u53d6\u5f97\u30c6\u30af\u30cb\u30c3\u30af<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-9\">XPath \u5f0f\u3092\u6d3b\u7528\u3057\u305f\u9ad8\u5ea6\u306a\u8981\u7d20\u6307\u5b9a<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-10\">\u5b9f\u8df5\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u5b9f\u52d9\u30c6\u30af\u30cb\u30c3\u30af<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-11\">\u8907\u6570\u30da\u30fc\u30b8\u306e\u52b9\u7387\u7684\u306a\u30af\u30ed\u30fc\u30ea\u30f3\u30b0\u65b9\u6cd5<\/a>      <\/li>      <li>        <a href=\"#i-12\">\u52d5\u7684\u30b3\u30f3\u30c6\u30f3\u30c4\u3078\u306e\u5bfe\u5fdc\u7b56<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-13\">\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\u3068\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-14\">\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3068\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3\u306e\u6539\u5584<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-15\">\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-16\">\u30e1\u30e2\u30ea\u51e6\u7406\u306b\u3088\u308b\u9ad8\u901f\u5316\u306e\u5b9f\u73fe<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-17\">\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u3068\u30de\u30ca\u30fc\u306e\u9069\u5207\u4e8b\u9805<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-18\">\u6b63\u3057\u3044\u30ea\u30af\u30a8\u30b9\u30c8\u5bfe\u7b56\u306e\u8a2d\u5b9a<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-19\">robots.txt\u306e\u5c0a\u91cd\u3068\u30b5\u30a4\u30c8\u30dd\u30ea\u30b7\u30fc\u306e\u78ba\u8a8d<\/a>      <\/li>    <\/ul>  <\/li>  <li>    <a href=\"#i-20\">\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u30ac\u30a4\u30c9<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-21\">\u30a8\u30f3\u30b3\u30fc\u30c9\u95a2\u9023\u306e\u554f\u984c\u89e3\u6c7a<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-22\">\u30d1\u30fc\u30b9\u5931\u6557\u6642\u306e\u5bfe\u51e6\u6cd5<\/a>      <\/li>    <\/ul>  <\/li>  <li class=\"last\">    <a href=\"#i-23\">\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u96c6<\/a>    <ul class=\"menu_level_1\">      <li class=\"first\">        <a href=\"#i-24\">\u30cb\u30e5\u30fc\u30b9\u30b5\u30a4\u30c8\u306e\u8a18\u4e8b\u60c5\u5831\u53d6\u5f97<\/a>      <\/li>      <li>        <a href=\"#i-25\">E\u30b3\u30de\u30fc\u30b9\u30b5\u30a4\u30c8\u306e\u5546\u54c1\u30c7\u30fc\u30bf\u53ce\u96c6<\/a>      <\/li>      <li class=\"last\">        <a href=\"#i-26\">SNS\u30d7\u30ed\u30d5\u30a3\u30fc\u30eb\u60c5\u5831\u306e\u62bd\u51fa<\/a>      <\/li>    <\/ul>  <\/li><\/ul>\n      <a href=\"#\" class=\"sgb-toc-button js-toc-button\" rel=\"nofollow\" data-open-dialog=\"true\"><i class=\"fa fa-list\"><\/i><span class=\"sgb-toc-button__text\">\u76ee\u6b21\u3078<\/span><\/a>\n    <\/div><\/div><h2 class=\"wp-block-heading\" id=\"i-0\">Nokogiri\u3068\u306f\uff1fWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u5f37\u529b\u306a\u5473\u65b9<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-1\">Ruby\u306eWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u5b9a\u756a\u3068\u3057\u3066\u9078\u3070\u308c\u308b\u7406\u7531<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u306f\u3001HTML\u3084XML\u3092\u89e3\u6790\u3059\u308b\u305f\u3081\u306eRuby\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u3042\u308a\u3001\u305d\u306e\u540d\u524d\u306f\u65e5\u672c\u8a9e\u306e\u300c\u92f8\uff08\u306e\u3053\u304e\u308a\uff09\u300d\u306b\u7531\u6765\u3057\u307e\u3059\u3002\u6728\u3092\u5207\u308b\u3088\u3046\u306b\u3001HTML\u3084XML\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u81ea\u5728\u306b\u89e3\u6790\u3067\u304d\u308b\u3068\u3044\u3046\u610f\u5473\u304c\u8fbc\u3081\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u304c\u591a\u304f\u306eRuby\u30a8\u30f3\u30b8\u30cb\u30a2\u304b\u3089\u9078\u3070\u308c\u308b\u7406\u7531\u306f\u3001\u4ee5\u4e0b\u306e\u7279\u5fb4\u306b\u3042\u308a\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u9ad8\u901f\u306a\u89e3\u6790\u51e6\u7406<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30cd\u30a4\u30c6\u30a3\u30d6\u306eC\u8a00\u8a9e\u5b9f\u88c5\u306b\u3088\u308a\u3001\u5927\u91cf\u306eHTML\u3084XML\u3092\u9ad8\u901f\u306b\u51e6\u7406<\/li>\n\n\n\n<li>\u30e1\u30e2\u30ea\u52b9\u7387\u306e\u826f\u3044\u5b9f\u88c5\u306b\u3088\u308a\u3001\u5927\u898f\u6a21\u306a\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3082\u6271\u3048\u308b<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u76f4\u611f\u7684\u306aAPI\u8a2d\u8a08<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'nokogiri'\n   require 'open-uri'\n\n   # HTML\u306e\u53d6\u5f97\u3068\u89e3\u6790\n   doc = Nokogiri::HTML(URI.open('https:\/\/example.com'))\n\n   # CSS\u30bb\u30ec\u30af\u30bf\u3067\u8981\u7d20\u3092\u53d6\u5f97\n   titles = doc.css('h1.title')\n\n   # XPath\u5f0f\u3067\u306e\u8981\u7d20\u53d6\u5f97\n   links = doc.xpath('\/\/a[@class=\"link\"]')<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u8c4a\u5bcc\u306a\u691c\u7d22\u6a5f\u80fd<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>CSS\u30bb\u30ec\u30af\u30bf\u3068XPath\u306e\u4e21\u65b9\u3092\u30b5\u30dd\u30fc\u30c8<\/li>\n\n\n\n<li>\u8981\u7d20\u306e\u691c\u7d22\u3001\u5c5e\u6027\u306e\u53d6\u5f97\u3001\u30c6\u30ad\u30b9\u30c8\u306e\u62bd\u51fa\u304c\u5bb9\u6613<\/li>\n\n\n\n<li>\u968e\u5c64\u69cb\u9020\u3092\u8003\u616e\u3057\u305f\u67d4\u8edf\u306a\u8981\u7d20\u6307\u5b9a\u304c\u53ef\u80fd<\/li>\n<\/ul>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u5f37\u529b\u306a\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u64cd\u4f5c<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8981\u7d20\u306e\u8ffd\u52a0\u3001\u524a\u9664\u3001\u7f6e\u63db\u304c\u53ef\u80fd<\/li>\n\n\n\n<li>\u5c5e\u6027\u306e\u64cd\u4f5c\u3084\u5185\u5bb9\u306e\u5909\u66f4\u304c\u5bb9\u6613<\/li>\n\n\n\n<li>DOM\u30c4\u30ea\u30fc\u306e\u8d70\u67fb\u6a5f\u80fd\u304c\u5145\u5b9f<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-2\">Nokogiri\u304c\u30b5\u30dd\u30fc\u30c8\u3059\u308b\u30d1\u30fc\u30b5\u30fc\u3068\u305d\u306e\u7279\u5fb4<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u306f\u8907\u6570\u306e\u30d1\u30fc\u30b5\u30fc\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u3066\u304a\u308a\u3001\u7528\u9014\u306b\u5fdc\u3058\u3066\u9069\u5207\u306a\u3082\u306e\u3092\u9078\u629e\u3067\u304d\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>HTML4\u30d1\u30fc\u30b5\u30fc<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6a19\u6e96\u7684\u306aHTML\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u89e3\u6790\u306b\u6700\u9069<\/li>\n\n\n\n<li>\u58ca\u308c\u305fHTML\u3082\u81ea\u52d5\u4fee\u6b63\u3057\u3066\u89e3\u6790<\/li>\n\n\n\n<li>\u6700\u3082\u4e00\u822c\u7684\u306b\u4f7f\u7528\u3055\u308c\u308b\u30d1\u30fc\u30b5\u30fc<\/li>\n<\/ul>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # HTML4\u30d1\u30fc\u30b5\u30fc\u306e\u4f7f\u7528\u4f8b\n   doc = Nokogiri::HTML4(html_content)<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>HTML5\u30d1\u30fc\u30b5\u30fc<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30e2\u30c0\u30f3\u306aHTML5\u6587\u66f8\u306e\u89e3\u6790\u306b\u5bfe\u5fdc<\/li>\n\n\n\n<li>\u3088\u308a\u53b3\u5bc6\u306aHTML5\u4ed5\u69d8\u306b\u6e96\u62e0<\/li>\n\n\n\n<li>\u65b0\u3057\u3044HTML5\u8981\u7d20\u3084\u30bb\u30de\u30f3\u30c6\u30a3\u30af\u30b9\u3092\u30b5\u30dd\u30fc\u30c8<\/li>\n<\/ul>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # HTML5\u30d1\u30fc\u30b5\u30fc\u306e\u4f7f\u7528\u4f8b\n   doc = Nokogiri::HTML5(html_content)<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>XML\u30d1\u30fc\u30b5\u30fc<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u53b3\u5bc6\u306aXML\u6587\u66f8\u306e\u89e3\u6790\u306b\u4f7f\u7528<\/li>\n\n\n\n<li>\u540d\u524d\u7a7a\u9593\u306e\u30b5\u30dd\u30fc\u30c8<\/li>\n\n\n\n<li>DTD\u3084\u30b9\u30ad\u30fc\u30de\u306e\u691c\u8a3c\u304c\u53ef\u80fd<\/li>\n<\/ul>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # XML\u30d1\u30fc\u30b5\u30fc\u306e\u4f7f\u7528\u4f8b\n   doc = Nokogiri::XML(xml_content)<\/pre>\n\n\n\n<ol start=\"4\" class=\"wp-block-list\">\n<li><strong>SAX\u30d1\u30fc\u30b5\u30fc<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30a4\u30d9\u30f3\u30c8\u30c9\u30ea\u30d6\u30f3\u306a\u89e3\u6790\u304c\u53ef\u80fd<\/li>\n\n\n\n<li>\u5927\u898f\u6a21\u306a\u30d5\u30a1\u30a4\u30eb\u3092\u52b9\u7387\u7684\u306b\u51e6\u7406<\/li>\n\n\n\n<li>\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u3092\u6291\u3048\u305f\u51e6\u7406\u304c\u53ef\u80fd<\/li>\n<\/ul>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # SAX\u30d1\u30fc\u30b5\u30fc\u306e\u4f7f\u7528\u4f8b\n   class MyHandler &lt; Nokogiri::XML::SAX::Document\n     def start_element(name, attrs = [])\n       puts \"\u958b\u59cb\u8981\u7d20: #{name}\"\n     end\n   end\n\n   parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)\n   parser.parse(xml_content)<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5404\u30d1\u30fc\u30b5\u30fc\u306f\u7279\u5b9a\u306e\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u306b\u6700\u9069\u5316\u3055\u308c\u3066\u304a\u308a\u3001\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306e\u8981\u4ef6\u306b\u5fdc\u3058\u3066\u9069\u5207\u306a\u3082\u306e\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u3067\u3001\u52b9\u7387\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u7279\u306b\u3001\u4e00\u822c\u7684\u306aWeb\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3067\u306fHTML4\u30d1\u30fc\u30b5\u30fc\u304c\u6700\u3082\u4f7f\u7528\u3055\u308c\u307e\u3059\u304c\u3001\u3088\u308a\u7279\u6b8a\u306a\u8981\u4ef6\u304c\u3042\u308b\u5834\u5408\u306f\u4ed6\u306e\u30d1\u30fc\u30b5\u30fc\u306e\u4f7f\u7528\u3092\u691c\u8a0e\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-3\">\u74b0\u5883\u69cb\u7bc9\u304b\u3089\u59cb\u3081\u308bNokogiri\u5165\u9580<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-4\">gem install\u304b\u3089bundler\u3067\u306e\u7ba1\u7406\u307e\u3067<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u306e\u74b0\u5883\u69cb\u7bc9\u306b\u306f\u8907\u6570\u306e\u65b9\u6cd5\u304c\u3042\u308a\u307e\u3059\u304c\u3001\u3053\u3053\u3067\u306f\u6700\u3082\u4e00\u822c\u7684\u306a\u624b\u9806\u3092\u89e3\u8aac\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u76f4\u63a5\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u6700\u65b0\u7248\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\n   gem install nokogiri\n\n   # \u7279\u5b9a\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u3092\u6307\u5b9a\u3057\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\n   gem install nokogiri -v '1.15.5'<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>Bundler\u3092\u4f7f\u7528\u3057\u305f\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # Gemfile\u306b\u8ffd\u52a0\n   source 'https:\/\/rubygems.org'\n\n   gem 'nokogiri'\n   # \u3082\u3057\u304f\u306f\u30d0\u30fc\u30b8\u30e7\u30f3\u3092\u6307\u5b9a\n   gem 'nokogiri', '~&gt; 1.15.5'\n\n   # \u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306e\u5b9f\u884c\n   bundle install<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u306e\u4f7f\u7528\u958b\u59cb<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # Bundler\u3092\u4f7f\u7528\u3059\u308b\u5834\u5408\n   require 'bundler\/setup'\n   require 'nokogiri'\n\n   # \u76f4\u63a5require\u3059\u308b\u5834\u5408\n   require 'nokogiri'<\/pre>\n\n\n\n<ol start=\"4\" class=\"wp-block-list\">\n<li><strong>\u52d5\u4f5c\u78ba\u8a8d<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u30d0\u30fc\u30b8\u30e7\u30f3\u78ba\u8a8d\n   puts Nokogiri::VERSION\n\n   # \u7c21\u5358\u306a\u89e3\u6790\u30c6\u30b9\u30c8\n   doc = Nokogiri::HTML('&lt;h1&gt;Hello, Nokogiri!&lt;\/h1&gt;')\n   puts doc.at_css('h1').text  # =&gt; \"Hello, Nokogiri!\"<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-5\">\u3088\u304f\u3042\u308b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u30a8\u30e9\u30fc\u3068\u305d\u306e\u89e3\u6c7a\u65b9\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u6642\u306b\u306f\u3001\u30b7\u30b9\u30c6\u30e0\u306e\u74b0\u5883\u306b\u3088\u3063\u3066\u69d8\u3005\u306a\u30a8\u30e9\u30fc\u304c\u767a\u751f\u3059\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u4e3b\u306a\u554f\u984c\u3068\u89e3\u6c7a\u65b9\u6cd5\u3092\u793a\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30cd\u30a4\u30c6\u30a3\u30d6\u30a8\u30af\u30b9\u30c6\u30f3\u30b7\u30e7\u30f3\u95a2\u9023\u306e\u30a8\u30e9\u30fc<\/strong> \u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u4f8b\uff1a<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   ERROR: Failed to build gem native extension.<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u89e3\u6c7a\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # Ubuntu\u306e\u5834\u5408\n   sudo apt-get install build-essential patch ruby-dev zlib1g-dev liblzma-dev\n\n   # macOS\u306e\u5834\u5408\n   xcode-select --install\n\n   # \u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u5f8c\u3001\u518d\u5ea6gem install\u3092\u5b9f\u884c\n   gem install nokogiri<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>libxml2\/libxslt\u95a2\u9023\u306e\u30a8\u30e9\u30fc<\/strong> \u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u4f8b\uff1a<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   ERROR: cannot find library 'libxml2'<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u89e3\u6c7a\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # Ubuntu\u306e\u5834\u5408\n   sudo apt-get install libxml2-dev libxslt-dev\n\n   # macOS\u306e\u5834\u5408\uff08Homebrew\u3092\u4f7f\u7528\uff09\n   brew install libxml2 libxslt\n\n   # \u30b7\u30b9\u30c6\u30e0\u306elibxml2\u3092\u4f7f\u7528\u3059\u308b\u5834\u5408\n   gem install nokogiri -- --use-system-libraries<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>SSL\u8a3c\u660e\u66f8\u95a2\u9023\u306e\u30a8\u30e9\u30fc<\/strong> \u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u4f8b\uff1a<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   SSL_connect returned=1 errno=0 state=error: certificate verify failed<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u89e3\u6c7a\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u8a3c\u660e\u66f8\u306e\u66f4\u65b0\uff08RubyGems\u306e\u5834\u5408\uff09\n   gem update --system\n\n   # \u307e\u305f\u306f\u74b0\u5883\u5909\u6570\u3067\u8a3c\u660e\u66f8\u306e\u30d1\u30b9\u3092\u6307\u5b9a\n   export SSL_CERT_FILE=\/path\/to\/cacert.pem<\/pre>\n\n\n\n<ol start=\"4\" class=\"wp-block-list\">\n<li><strong>\u30e1\u30e2\u30ea\u4e0d\u8db3\u30a8\u30e9\u30fc<\/strong> \u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u4f8b\uff1a<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   Failed to allocate memory (NoMemoryError)<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u89e3\u6c7a\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # swap\u30d5\u30a1\u30a4\u30eb\u306e\u4f5c\u6210\uff08Linux\u306e\u5834\u5408\uff09\n   sudo dd if=\/dev\/zero of=\/swapfile bs=1M count=2048\n   sudo chmod 600 \/swapfile\n   sudo mkswap \/swapfile\n   sudo swapon \/swapfile<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306b\u554f\u984c\u304c\u767a\u751f\u3057\u305f\u5834\u5408\u306f\u3001\u4ee5\u4e0b\u306e\u624b\u9806\u3067\u5bfe\u51e6\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u3092\u6ce8\u610f\u6df1\u304f\u8aad\u3080<\/li>\n\n\n\n<li>\u30b7\u30b9\u30c6\u30e0\u306e\u4f9d\u5b58\u95a2\u4fc2\u3092\u78ba\u8a8d<\/li>\n\n\n\n<li>\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/li>\n\n\n\n<li>\u74b0\u5883\u5909\u6570\u306e\u8a2d\u5b9a\u3092\u78ba\u8a8d<\/li>\n\n\n\n<li>\u5fc5\u8981\u306b\u5fdc\u3058\u3066\u30b7\u30b9\u30c6\u30e0\u3092\u518d\u8d77\u52d5<\/li>\n<\/ol>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u624b\u9806\u3067\u89e3\u6c7a\u3057\u306a\u3044\u5834\u5408\u306f\u3001<a href=\"https:\/\/nokogiri.org\/tutorials\/installing_nokogiri.html\">Nokogiri\u306e\u516c\u5f0f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8<\/a>\u3084<a href=\"https:\/\/github.com\/sparklemotion\/nokogiri\/issues\">GitHub\u306eIssues<\/a>\u3067\u8ffd\u52a0\u306e\u60c5\u5831\u3092\u78ba\u8a8d\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-6\">Nokogiri \u306e\u57fa\u672c\u64cd\u4f5c\u30de\u30b9\u30bf\u30fc<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-7\">HTML \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u8aad\u307f\u8fbc\u307f\u3068\u89e3\u6790<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u3067\u306f\u3001\u69d8\u3005\u306a\u65b9\u6cd5\u3067HTML\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u8aad\u307f\u8fbc\u3080\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u4e3b\u8981\u306a\u65b9\u6cd5\u3092\u793a\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6587\u5b57\u5217\u304b\u3089\u306e\u8aad\u307f\u8fbc\u307f<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # HTML\u6587\u5b57\u5217\u304b\u3089\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u4f5c\u6210\n   html = '&lt;html&gt;&lt;body&gt;&lt;h1&gt;Hello World&lt;\/h1&gt;&lt;\/body&gt;&lt;\/html&gt;'\n   doc = Nokogiri::HTML(html)\n\n   # \u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u3092\u6307\u5b9a\u3059\u308b\u5834\u5408\n   doc = Nokogiri::HTML(html, nil, 'UTF-8')<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u306e\u8aad\u307f\u8fbc\u307f<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u30ed\u30fc\u30ab\u30eb\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u3080\n   doc = Nokogiri::HTML(File.open('index.html'))\n\n   # open-uri\u3092\u4f7f\u7528\u3057\u3066Web\u30da\u30fc\u30b8\u3092\u8aad\u307f\u8fbc\u3080\n   require 'open-uri'\n   doc = Nokogiri::HTML(URI.open('https:\/\/example.com'))<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u30d5\u30e9\u30b0\u30e1\u30f3\u30c8\u306e\u89e3\u6790<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # HTML\u30d5\u30e9\u30b0\u30e1\u30f3\u30c8\u3092\u89e3\u6790\n   fragment = Nokogiri::HTML.fragment('&lt;div&gt;\u90e8\u5206\u7684\u306aHTML&lt;\/div&gt;')\n\n   # \u30d5\u30e9\u30b0\u30e1\u30f3\u30c8\u5185\u306e\u8981\u7d20\u3092\u64cd\u4f5c\n   fragment.css('div').each do |div|\n     puts div.content\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-8\">CSS \u30bb\u30ec\u30af\u30bf\u3092\u4f7f\u7528\u3057\u305f\u8981\u7d20\u306e\u53d6\u5f97\u30c6\u30af\u30cb\u30c3\u30af<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">CSS\u30bb\u30ec\u30af\u30bf\u3092\u4f7f\u7528\u3059\u308b\u3068\u3001\u76f4\u611f\u7684\u306b\u8981\u7d20\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u57fa\u672c\u7684\u306a\u30bb\u30ec\u30af\u30bf\u306e\u4f7f\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u30bf\u30b0\u540d\u306b\u3088\u308b\u53d6\u5f97\n   doc.css('h1')               # \u3059\u3079\u3066\u306eh1\u30bf\u30b0\n\n   # \u30af\u30e9\u30b9\u306b\u3088\u308b\u53d6\u5f97\n   doc.css('.content')         # content\u30af\u30e9\u30b9\u3092\u6301\u3064\u8981\u7d20\n\n   # ID\u306b\u3088\u308b\u53d6\u5f97\n   doc.css('#main')           # main\u3068\u3044\u3046ID\u3092\u6301\u3064\u8981\u7d20\n\n   # \u5c5e\u6027\u306b\u3088\u308b\u53d6\u5f97\n   doc.css('a[href]')         # href\u5c5e\u6027\u3092\u6301\u3064\u3059\u3079\u3066\u306ea\u30bf\u30b0\n   doc.css('img[alt=\"logo\"]') # alt\u5c5e\u6027\u304c\"logo\"\u3067\u3042\u308bimg\u8981\u7d20<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u8907\u5408\u30bb\u30ec\u30af\u30bf\u306e\u6d3b\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u5b50\u5b6b\u30bb\u30ec\u30af\u30bf\n   doc.css('div.content p')    # content\u30af\u30e9\u30b9\u306ediv\u5185\u306e\u3059\u3079\u3066\u306ep\u8981\u7d20\n\n   # \u76f4\u63a5\u306e\u5b50\u8981\u7d20\n   doc.css('ul &gt; li')         # ul\u306e\u76f4\u63a5\u306e\u5b50\u3067\u3042\u308bli\u8981\u7d20\n\n   # \u8907\u6570\u6761\u4ef6\u306e\u7d44\u307f\u5408\u308f\u305b\n   doc.css('div.content, div.sidebar') # content\u30af\u30e9\u30b9\u307e\u305f\u306fsidebar\u30af\u30e9\u30b9\u306ediv<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u8981\u7d20\u306e\u64cd\u4f5c<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u30c6\u30ad\u30b9\u30c8\u5185\u5bb9\u306e\u53d6\u5f97\n   doc.css('h1').each do |element|\n     puts element.text        # \u30c6\u30ad\u30b9\u30c8\u5185\u5bb9\u3092\u51fa\u529b\n   end\n\n   # \u5c5e\u6027\u5024\u306e\u53d6\u5f97\n   doc.css('a').each do |link|\n     puts link['href']       # href\u5c5e\u6027\u306e\u5024\u3092\u51fa\u529b\n   end\n\n   # \u5c5e\u6027\u306e\u8a2d\u5b9a\n   doc.css('img').each do |img|\n     img['loading'] = 'lazy' # loading\u5c5e\u6027\u3092\u8a2d\u5b9a\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-9\">XPath \u5f0f\u3092\u6d3b\u7528\u3057\u305f\u9ad8\u5ea6\u306a\u8981\u7d20\u6307\u5b9a<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">XPath\u5f0f\u3092\u4f7f\u7528\u3059\u308b\u3068\u3001\u3088\u308a\u7d30\u304b\u3044\u8981\u7d20\u306e\u6307\u5b9a\u304c\u53ef\u80fd\u3067\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u57fa\u672c\u7684\u306aXPath\u5f0f<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u7d76\u5bfe\u30d1\u30b9\n   doc.xpath('\/html\/body\/div')  # html\u306ebody\u5185\u306ediv\u3092\u53d6\u5f97\n\n   # \u76f8\u5bfe\u30d1\u30b9\n   doc.xpath('.\/\/p')            # \u73fe\u5728\u306e\u30ce\u30fc\u30c9\u304b\u3089\u898b\u3066\u4efb\u610f\u306e\u968e\u5c64\u306ep\u8981\u7d20\n\n   # \u5c5e\u6027\u306b\u3088\u308b\u6307\u5b9a\n   doc.xpath('\/\/div[@class=\"content\"]')  # content\u30af\u30e9\u30b9\u3092\u6301\u3064div<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u9ad8\u5ea6\u306a\u6761\u4ef6\u6307\u5b9a<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u30c6\u30ad\u30b9\u30c8\u5185\u5bb9\u306b\u3088\u308b\u6307\u5b9a\n   doc.xpath('\/\/p[contains(text(), \"\u91cd\u8981\")]')  # \"\u91cd\u8981\"\u3092\u542b\u3080p\u8981\u7d20\n\n   # \u4f4d\u7f6e\u306b\u3088\u308b\u6307\u5b9a\n   doc.xpath('\/\/ul\/li[1]')                    # \u5404ul\u306e\u6700\u521d\u306eli\u8981\u7d20\n   doc.xpath('\/\/ul\/li[last()]')               # \u5404ul\u306e\u6700\u5f8c\u306eli\u8981\u7d20\n\n   # \u8907\u6570\u6761\u4ef6\u306e\u7d44\u307f\u5408\u308f\u305b\n   doc.xpath('\/\/div[@class=\"content\" and contains(@id, \"main\")]')<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u30ab\u30b9\u30bf\u30e0\u95a2\u6570\u306e\u6d3b\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u540d\u524d\u7a7a\u9593\u306e\u767b\u9332\n   doc.xpath('\/\/custom:element', \n     'custom' =&gt; 'http:\/\/example.com\/namespace')\n\n   # \u30ab\u30b9\u30bf\u30e0XPath\u95a2\u6570\u306e\u5b9a\u7fa9\n   module MyCustomFunctions\n     def filter_by_length(nodes, min_length)\n       nodes.find_all { |node| node.content.length &gt;= min_length.to_i }\n     end\n   end\n\n   # \u95a2\u6570\u306e\u767b\u9332\u3068\u4f7f\u7528\n   Nokogiri::XML::Document.send(:include, MyCustomFunctions)\n   doc.xpath('\/\/p[filter_by_length(., 100)]')<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u57fa\u672c\u64cd\u4f5c\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u8907\u96d1\u306aHTML\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u304b\u3089\u5fc5\u8981\u306a\u60c5\u5831\u3092\u52b9\u7387\u7684\u306b\u62bd\u51fa\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001CSS\u30bb\u30ec\u30af\u30bf\u3068XPath\u5f0f\u306f\u72b6\u6cc1\u306b\u5fdc\u3058\u3066\u4f7f\u3044\u5206\u3051\u308b\u3053\u3068\u3067\u3001\u3088\u308a\u67d4\u8edf\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-10\">\u5b9f\u8df5\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u5b9f\u52d9\u30c6\u30af\u30cb\u30c3\u30af<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-11\">\u8907\u6570\u30da\u30fc\u30b8\u306e\u52b9\u7387\u7684\u306a\u30af\u30ed\u30fc\u30ea\u30f3\u30b0\u65b9\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5927\u898f\u6a21\u306aWeb\u30b5\u30a4\u30c8\u306e\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3067\u306f\u3001\u8907\u6570\u30da\u30fc\u30b8\u3092\u52b9\u7387\u7684\u306b\u51e6\u7406\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u5b9f\u8df5\u7684\u306a\u30c6\u30af\u30cb\u30c3\u30af\u3092\u7d39\u4ecb\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30da\u30fc\u30b8\u30cd\u30fc\u30b7\u30e7\u30f3\u51e6\u7406<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'nokogiri'\n   require 'open-uri'\n   require 'uri'\n\n   class PaginationCrawler\n     def initialize(base_url)\n       @base_url = base_url\n       @processed_urls = Set.new\n       @results = []\n     end\n\n     def crawl(max_pages: 10)\n       current_page = 1\n\n       while current_page &lt;= max_pages\n         url = \"#{@base_url}?page=#{current_page}\"\n         break unless process_page(url)\n         current_page += 1\n\n         # \u30af\u30ed\u30fc\u30eb\u9593\u9694\u3092\u8a2d\u5b9a\n         sleep(1)\n       end\n\n       @results\n     end\n\n     private\n\n     def process_page(url)\n       return false if @processed_urls.include?(url)\n\n       doc = Nokogiri::HTML(URI.open(url))\n       @processed_urls.add(url)\n\n       # \u30da\u30fc\u30b8\u5185\u306e\u30c7\u30fc\u30bf\u3092\u62bd\u51fa\n       items = doc.css('.item').map do |item|\n         {\n           title: item.at_css('.title')&amp;.text&amp;.strip,\n           price: item.at_css('.price')&amp;.text&amp;.strip\n         }\n       end\n\n       @results.concat(items)\n\n       # \u6b21\u306e\u30da\u30fc\u30b8\u304c\u5b58\u5728\u3059\u308b\u304b\u78ba\u8a8d\n       !!doc.at_css('.next-page')\n     rescue OpenURI::HTTPError =&gt; e\n       puts \"\u30da\u30fc\u30b8\u53d6\u5f97\u30a8\u30e9\u30fc: #{url} - #{e.message}\"\n       false\n     end\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u4e26\u884c\u51e6\u7406\u306b\u3088\u308b\u9ad8\u901f\u5316<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'parallel'\n\n   def parallel_crawl(urls, max_concurrency: 3)\n     Parallel.map(urls, in_processes: max_concurrency) do |url|\n       begin\n         doc = Nokogiri::HTML(URI.open(url))\n         # \u30c7\u30fc\u30bf\u62bd\u51fa\u51e6\u7406\n         {\n           url: url,\n           data: extract_data(doc),\n           status: 'success'\n         }\n       rescue =&gt; e\n         {\n           url: url,\n           error: e.message,\n           status: 'error'\n         }\n       end\n     end\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-12\">\u52d5\u7684\u30b3\u30f3\u30c6\u30f3\u30c4\u3078\u306e\u5bfe\u5fdc\u7b56<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">JavaScript\u3067\u52d5\u7684\u306b\u751f\u6210\u3055\u308c\u308b\u30b3\u30f3\u30c6\u30f3\u30c4\u3078\u306e\u5bfe\u5fdc\u65b9\u6cd5\u3092\u8aac\u660e\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>API\u30a8\u30f3\u30c9\u30dd\u30a4\u30f3\u30c8\u306e\u6d3b\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'json'\n   require 'net\/http'\n\n   def fetch_api_data(api_url, params = {})\n     uri = URI(api_url)\n     uri.query = URI.encode_www_form(params)\n\n     response = Net::HTTP.get_response(uri)\n\n     if response.is_a?(Net::HTTPSuccess)\n       JSON.parse(response.body)\n     else\n       raise \"API\u547c\u3073\u51fa\u3057\u30a8\u30e9\u30fc: #{response.code}\"\n     end\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30d8\u30c3\u30c9\u30ec\u30b9\u30d6\u30e9\u30a6\u30b6\u3068\u306e\u9023\u643a<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'selenium-webdriver'\n\n   def scrape_dynamic_content(url)\n     options = Selenium::WebDriver::Chrome::Options.new\n     options.add_argument('--headless')\n\n     driver = Selenium::WebDriver.create(:chrome, options: options)\n\n     begin\n       driver.get(url)\n       # JavaScript\u306e\u5b9f\u884c\u5b8c\u4e86\u3092\u5f85\u6a5f\n       wait = Selenium::WebDriver::Wait.new(timeout: 10)\n       wait.until { driver.execute_script('return document.readyState') == 'complete' }\n\n       # HTML\u53d6\u5f97\u3068Nokogiri\u89e3\u6790\n       doc = Nokogiri::HTML(driver.page_source)\n       extract_data(doc)\n     ensure\n       driver.quit\n     end\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-13\">\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\u3068\u30a8\u30e9\u30fc\u30cf\u30f3\u30c9\u30ea\u30f3\u30b0<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5805\u7262\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b7\u30b9\u30c6\u30e0\u306b\u306f\u3001\u9069\u5207\u306a\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\u3068\u30a8\u30e9\u30fc\u51e6\u7406\u304c\u4e0d\u53ef\u6b20\u3067\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30c7\u30fc\u30bf\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class DataValidator\n     def self.validate_item(item)\n       errors = []\n\n       errors &lt;&lt; \"\u30bf\u30a4\u30c8\u30eb\u304c\u7a7a\u3067\u3059\" if item[:title].nil? || item[:title].empty?\n       errors &lt;&lt; \"\u4fa1\u683c\u304c\u4e0d\u6b63\u3067\u3059\" unless valid_price?(item[:price])\n       errors &lt;&lt; \"URL\u304c\u4e0d\u6b63\u3067\u3059\" unless valid_url?(item[:url])\n\n       {\n         valid: errors.empty?,\n         errors: errors,\n         data: item\n       }\n     end\n\n     private\n\n     def self.valid_price?(price)\n       return false unless price.is_a?(String)\n       price.match?(\/^\\d+,?\\d*\u5186$\/)\n     end\n\n     def self.valid_url?(url)\n       uri = URI.parse(url)\n       uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)\n     rescue URI::InvalidURIError\n       false\n     end\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30a8\u30e9\u30fc\u51e6\u7406\u3068\u30ea\u30c8\u30e9\u30a4\u6a5f\u80fd<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class ResilientScraper\n     MAX_RETRIES = 3\n     RETRY_DELAY = 2 # \u79d2\n\n     def scrape_with_retry(url)\n       retries = 0\n\n       begin\n         doc = Nokogiri::HTML(URI.open(url))\n         process_document(doc)\n       rescue OpenURI::HTTPError =&gt; e\n         if e.message =~ \/429|5\\d\\d\/ &amp;&amp; retries &lt; MAX_RETRIES\n           retries += 1\n           sleep(RETRY_DELAY * retries)\n           retry\n         else\n           raise\n         end\n       rescue StandardError =&gt; e\n         log_error(url, e)\n         raise\n       end\n     end\n\n     private\n\n     def log_error(url, error)\n       # \u30a8\u30e9\u30fc\u30ed\u30b0\u306e\u8a18\u9332\n       puts \"[#{Time.now}] Error scraping #{url}: #{error.message}\"\n       puts error.backtrace.join(\"\\n\")\n     end\n   end<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u30c6\u30af\u30cb\u30c3\u30af\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u5b9f\u904b\u7528\u306b\u8010\u3048\u3046\u308b\u5805\u7262\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002\u7279\u306b\u3001\u30a8\u30e9\u30fc\u51e6\u7406\u3068\u30d0\u30ea\u30c7\u30fc\u30b7\u30e7\u30f3\u306f\u672c\u756a\u74b0\u5883\u3067\u306e\u5b89\u5b9a\u6027\u3092\u78ba\u4fdd\u3059\u308b\u4e0a\u3067\u91cd\u8981\u306a\u5f79\u5272\u3092\u679c\u305f\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-14\">\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3068\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3\u306e\u6539\u5584<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-15\">\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5927\u898f\u6a21\u306aHTML\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u3092\u51e6\u7406\u3059\u308b\u969b\u306f\u3001\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u6700\u9069\u5316\u304c\u91cd\u8981\u3067\u3059\u3002\u4ee5\u4e0b\u306b\u52b9\u679c\u7684\u306a\u624b\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30b9\u30c8\u30ea\u30fc\u30e0\u51e6\u7406\u306e\u6d3b\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'nokogiri'\n\n   class StreamParser &lt; Nokogiri::XML::SAX::Document\n     def initialize\n       @current_depth = 0\n       @data = []\n     end\n\n     def start_element(name, attributes = [])\n       @current_depth += 1\n\n       # \u7279\u5b9a\u306e\u8981\u7d20\u306e\u307f\u3092\u51e6\u7406\n       if name == 'article' &amp;&amp; @current_depth == 2\n         @current_article = {}\n         attributes.each do |key, value|\n           @current_article[key] = value\n         end\n       end\n     end\n\n     def end_element(name)\n       if name == 'article' &amp;&amp; @current_depth == 2\n         process_article(@current_article)\n         @current_article = nil\n       end\n       @current_depth -= 1\n     end\n\n     private\n\n     def process_article(article)\n       # \u5fc5\u8981\u306a\u51e6\u7406\u306e\u307f\u3092\u5b9f\u884c\n       @data &lt;&lt; article\n     end\n   end\n\n   # \u4f7f\u7528\u4f8b\n   parser = Nokogiri::XML::SAX::Parser.new(StreamParser.new)\n   parser.parse(File.open(\"large_file.xml\"))<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30e1\u30e2\u30ea\u89e3\u653e\u306e\u660e\u793a\u7684\u5236\u5fa1<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class MemoryEfficientParser\n     def process_large_document(file_path)\n       batch_size = 1000\n       current_batch = []\n\n       File.open(file_path) do |file|\n         doc = Nokogiri::HTML(file)\n\n         doc.css('article').each do |article|\n           current_batch &lt;&lt; extract_data(article)\n\n           if current_batch.size &gt;= batch_size\n             process_batch(current_batch)\n             current_batch = []\n             GC.start # \u660e\u793a\u7684\u306a\u30ac\u30d9\u30fc\u30b8\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\n           end\n         end\n\n         # \u6b8b\u308a\u306e\u30d0\u30c3\u30c1\u3092\u51e6\u7406\n         process_batch(current_batch) unless current_batch.empty?\n       end\n     end\n\n     private\n\n     def extract_data(article)\n       # \u5fc5\u8981\u306a\u30c7\u30fc\u30bf\u306e\u307f\u62bd\u51fa\n       {\n         title: article.at_css('h1')&amp;.text,\n         content: article.at_css('p')&amp;.text\n       }\n     end\n\n     def process_batch(batch)\n       # \u30d0\u30c3\u30c1\u51e6\u7406\u306e\u5b9f\u88c5\n       batch.each do |data|\n         # \u30c7\u30fc\u30bf\u306e\u4fdd\u5b58\u3084\u52a0\u5de5\n       end\n     end\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-16\">\u30e1\u30e2\u30ea\u51e6\u7406\u306b\u3088\u308b\u9ad8\u901f\u5316\u306e\u5b9f\u73fe<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u6700\u5927\u9650\u306b\u5f15\u304d\u51fa\u3059\u305f\u3081\u306e\u9ad8\u5ea6\u306a\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af\u3092\u7d39\u4ecb\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u4e26\u5217\u51e6\u7406\u306e\u6700\u9069\u5316<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'parallel'\n   require 'oj' # \u9ad8\u901f\u306aJSON\u30d1\u30fc\u30b5\u30fc\n\n   class ParallelProcessor\n     def initialize(worker_count = 4)\n       @worker_count = worker_count\n       @mutex = Mutex.new\n       @results = []\n     end\n\n     def process_documents(urls)\n       Parallel.each(urls, in_processes: @worker_count) do |url|\n         result = process_single_document(url)\n\n         @mutex.synchronize do\n           @results &lt;&lt; result\n         end\n       end\n\n       @results\n     end\n\n     private\n\n     def process_single_document(url)\n       doc = Nokogiri::HTML(URI.open(url))\n\n       # \u30e1\u30e2\u30ea\u52b9\u7387\u306e\u826f\u3044\u30c7\u30fc\u30bf\u62bd\u51fa\n       {\n         url: url,\n         data: extract_minimal_data(doc)\n       }\n     end\n\n     def extract_minimal_data(doc)\n       # \u5fc5\u8981\u6700\u5c0f\u9650\u306e\u30c7\u30fc\u30bf\u306e\u307f\u3092\u62bd\u51fa\n       doc.css('target_element').map do |element|\n         element.text.strip\n       end\n     ensure\n       # \u660e\u793a\u7684\u306b\u30e1\u30e2\u30ea\u3092\u89e3\u653e\n       doc = nil\n       GC.start\n     end\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u6d3b\u7528<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'redis'\n\n   class CachedParser\n     def initialize\n       @redis = Redis.new\n       @cache_ttl = 3600 # 1\u6642\u9593\n     end\n\n     def parse_with_cache(url)\n       cache_key = \"parsed_content:#{Digest::MD5.hexdigest(url)}\"\n\n       # \u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u78ba\u8a8d\n       if cached = @redis.get(cache_key)\n         return Oj.load(cached)\n       end\n\n       # \u65b0\u898f\u30d1\u30fc\u30b9\n       result = parse_fresh_content(url)\n\n       # \u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u4fdd\u5b58\n       @redis.setex(cache_key, @cache_ttl, Oj.dump(result))\n\n       result\n     end\n\n     private\n\n     def parse_fresh_content(url)\n       doc = Nokogiri::HTML(URI.open(url))\n\n       # \u30e1\u30e2\u30ea\u52b9\u7387\u3092\u8003\u616e\u3057\u305f\u30d1\u30fc\u30b9\u51e6\u7406\n       {\n         title: doc.at_css('title')&amp;.text,\n         content: doc.at_css('main')&amp;.text,\n         timestamp: Time.now.to_i\n       }\n     end\n   end<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u30d7\u30ed\u30d5\u30a1\u30a4\u30ea\u30f3\u30b0\u3068\u30e2\u30cb\u30bf\u30ea\u30f3\u30b0<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'memory_profiler'\n\n   class ProfilingParser\n     def self.profile_parsing(url)\n       report = MemoryProfiler.report do\n         doc = Nokogiri::HTML(URI.open(url))\n         yield(doc) if block_given?\n       end\n\n       # \u30e1\u30e2\u30ea\u4f7f\u7528\u72b6\u6cc1\u306e\u30ec\u30dd\u30fc\u30c8\u51fa\u529b\n       report.pretty_print(to_file: 'memory_profile.txt')\n     end\n\n     def self.monitor_memory_usage\n       initial_memory = GetProcessMem.new.mb\n\n       yield if block_given?\n\n       final_memory = GetProcessMem.new.mb\n       memory_difference = final_memory - initial_memory\n\n       puts \"\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u5909\u5316: #{memory_difference.round(2)}MB\"\n     end\n   end<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u6700\u9069\u5316\u30c6\u30af\u30cb\u30c3\u30af\u3092\u9069\u5207\u306b\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u3082\u5b89\u5b9a\u3057\u305f\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5b9f\u73fe\u3067\u304d\u307e\u3059\u3002\u7279\u306b\u3001\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u306e\u5236\u5fa1\u3068\u30ad\u30e3\u30c3\u30b7\u30e5\u6226\u7565\u306f\u3001\u904b\u7528\u74b0\u5883\u3067\u306e\u5b89\u5b9a\u6027\u3092\u78ba\u4fdd\u3059\u308b\u4e0a\u3067\u91cd\u8981\u306a\u8981\u7d20\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-17\">\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u3068\u30de\u30ca\u30fc\u306e\u9069\u5207\u4e8b\u9805<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-18\">\u6b63\u3057\u3044\u30ea\u30af\u30a8\u30b9\u30c8\u5bfe\u7b56\u306e\u8a2d\u5b9a<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Web\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u884c\u3046\u969b\u306f\u3001\u5bfe\u8c61\u30b5\u30a4\u30c8\u3078\u306e\u8ca0\u8377\u3068\u30bb\u30ad\u30e5\u30ea\u30c6\u30a3\u3092\u8003\u616e\u3057\u305f\u9069\u5207\u306a\u8a2d\u5b9a\u304c\u5fc5\u8981\u3067\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30ea\u30af\u30a8\u30b9\u30c8\u30d8\u30c3\u30c0\u30fc\u306e\u9069\u5207\u306a\u8a2d\u5b9a<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'nokogiri'\n   require 'open-uri'\n\n   class ResponsibleScraper\n     def initialize\n       @headers = {\n         'User-Agent' =&gt; 'MyBot\/1.0 (contact@example.com)',\n         'Accept' =&gt; 'text\/html,application\/xhtml+xml,application\/xml',\n         'Accept-Language' =&gt; 'ja,en-US;q=0.9,en;q=0.8'\n       }\n     end\n\n     def fetch_page(url)\n       URI.open(url, @headers) do |f|\n         Nokogiri::HTML(f)\n       end\n     rescue OpenURI::HTTPError =&gt; e\n       handle_http_error(e, url)\n     end\n\n     private\n\n     def handle_http_error(error, url)\n       case error.message\n       when \/429\/\n         puts \"\u30ec\u30fc\u30c8\u5236\u9650\u306b\u9054\u3057\u307e\u3057\u305f: #{url}\"\n         sleep(300) # 5\u5206\u5f85\u6a5f\n       when \/403\/\n         puts \"\u30a2\u30af\u30bb\u30b9\u304c\u62d2\u5426\u3055\u308c\u307e\u3057\u305f: #{url}\"\n       else\n         puts \"\u30a8\u30e9\u30fc\u304c\u767a\u751f\u3057\u307e\u3057\u305f: #{error.message}\"\n       end\n       nil\n     end\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30a2\u30af\u30bb\u30b9\u983b\u5ea6\u306e\u5236\u5fa1<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class RateLimiter\n     def initialize(requests_per_minute: 20)\n       @interval = 60.0 \/ requests_per_minute\n       @last_request = Time.now - @interval\n     end\n\n     def throttle\n       wait_time = @interval - (Time.now - @last_request)\n       sleep(wait_time) if wait_time &gt; 0\n       @last_request = Time.now\n\n       yield if block_given?\n     end\n   end\n\n   # \u4f7f\u7528\u4f8b\n   limiter = RateLimiter.new(requests_per_minute: 30)\n   urls.each do |url|\n     limiter.throttle do\n       # \u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u51e6\u7406\n     end\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-19\">robots.txt\u306e\u5c0a\u91cd\u3068\u30b5\u30a4\u30c8\u30dd\u30ea\u30b7\u30fc\u306e\u78ba\u8a8d<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Web\u30b5\u30a4\u30c8\u306e\u30dd\u30ea\u30b7\u30fc\u3092\u5c0a\u91cd\u3059\u308b\u3053\u3068\u306f\u3001\u502b\u7406\u7684\u306a\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u57fa\u672c\u3067\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>robots.txt\u306e\u89e3\u6790\u3068\u9075\u5b88<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'robotstxt'\n\n   class PolicyCompliantScraper\n     def initialize(base_url)\n       @base_url = base_url\n       @parser = initialize_robots_parser\n     end\n\n     def can_crawl?(url)\n       return false unless @parser\n\n       path = URI.parse(url).path\n       @parser.allowed?(path, user_agent: 'MyBot\/1.0')\n     end\n\n     private\n\n     def initialize_robots_parser\n       robots_url = URI.join(@base_url, '\/robots.txt')\n       robots_content = URI.open(robots_url).read\n       Robotstxt.parse(robots_content, user_agent: 'MyBot\/1.0')\n     rescue OpenURI::HTTPError\n       puts \"robots.txt\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3067\u3057\u305f: #{@base_url}\"\n       nil\n     end\n   end\n\n   # \u4f7f\u7528\u4f8b\n   scraper = PolicyCompliantScraper.new('https:\/\/example.com')\n   if scraper.can_crawl?('\/articles\/123')\n     # \u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u51e6\u7406\n   end<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30b5\u30a4\u30c8\u30dd\u30ea\u30b7\u30fc\u306e\u78ba\u8a8d\u3068\u9075\u5b88<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class SitePolicy\n     def self.check_terms_of_service(url)\n       domain = URI.parse(url).host\n       tos_paths = ['\/terms', '\/tos', '\/terms-of-service']\n\n       tos_paths.each do |path|\n         tos_url = \"https:\/\/#{domain}#{path}\"\n         begin\n           response = URI.open(tos_url)\n           puts \"\u5229\u7528\u898f\u7d04\u3092\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044: #{tos_url}\"\n           return true\n         rescue OpenURI::HTTPError\n           next\n         end\n       end\n\n       puts \"\u5229\u7528\u898f\u7d04\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3067\u3057\u305f\u3002\u30b5\u30a4\u30c8\u7ba1\u7406\u8005\u306b\u78ba\u8a8d\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002\"\n       false\n     end\n\n     def self.validate_content_usage(content)\n       restricted_patterns = [\n         \/confidential\/i,\n         \/private\/i,\n         \/proprietary\/i\n       ]\n\n       restricted_patterns.each do |pattern|\n         if content.match?(pattern)\n           raise \"\u5236\u9650\u4ed8\u304d\u30b3\u30f3\u30c6\u30f3\u30c4\u304c\u691c\u51fa\u3055\u308c\u307e\u3057\u305f\"\n         end\n       end\n     end\n   end<\/pre>\n\n\n\n<ol start=\"3\" class=\"wp-block-list\">\n<li><strong>\u30a8\u30e9\u30fc\u6642\u306e\u9069\u5207\u306a\u5bfe\u5fdc<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class EthicalScraper\n     MAX_RETRIES = 3\n     BACKOFF_FACTOR = 2\n\n     def scrape_with_respect(url)\n       retries = 0\n\n       begin\n         return unless SitePolicy.check_terms_of_service(url)\n\n         doc = ResponsibleScraper.new.fetch_page(url)\n         content = extract_content(doc)\n         SitePolicy.validate_content_usage(content)\n\n         content\n       rescue =&gt; e\n         retries += 1\n         if retries &lt;= MAX_RETRIES\n           sleep(BACKOFF_FACTOR ** retries)\n           retry\n         else\n           log_error(url, e)\n           nil\n         end\n       end\n     end\n\n     private\n\n     def log_error(url, error)\n       File.open('scraping_errors.log', 'a') do |f|\n         f.puts \"[#{Time.now}] Error scraping #{url}: #{error.message}\"\n       end\n     end\n   end<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u5bfe\u7b56\u3092\u5b9f\u88c5\u3059\u308b\u3053\u3068\u3067\u3001\u5bfe\u8c61\u30b5\u30a4\u30c8\u306b\u8ca0\u8377\u3092\u304b\u3051\u305a\u306b\u3001\u304b\u3064\u502b\u7406\u7684\u306a\u65b9\u6cd5\u3067\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u5b9f\u65bd\u3067\u304d\u307e\u3059\u3002\u307e\u305f\u3001\u9069\u5207\u306a\u30a8\u30e9\u30fc\u51e6\u7406\u3068\u30ed\u30b0\u8a18\u9332\u306b\u3088\u308a\u3001\u554f\u984c\u304c\u767a\u751f\u3057\u305f\u969b\u306e\u5bfe\u5fdc\u3082\u5bb9\u6613\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-20\">\u30c8\u30e9\u30d6\u30eb\u30b7\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u30ac\u30a4\u30c9<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-21\">\u30a8\u30f3\u30b3\u30fc\u30c9\u95a2\u9023\u306e\u554f\u984c\u89e3\u6c7a<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Nokogiri\u3067\u3088\u304f\u906d\u9047\u3059\u308b\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u554f\u984c\u3068\u305d\u306e\u89e3\u6c7a\u65b9\u6cd5\u3092\u8aac\u660e\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6587\u5b57\u5316\u3051\u3078\u306e\u5bfe\u51e6<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   require 'nokogiri'\n   require 'open-uri'\n\n   # \u57fa\u672c\u7684\u306a\u6587\u5b57\u30b3\u30fc\u30c9\u5bfe\u5fdc\n   html = URI.open('https:\/\/example.jp').read\n   doc = Nokogiri::HTML(html, nil, 'Shift_JIS')\n\n   # \u6587\u5b57\u30b3\u30fc\u30c9\u3092\u660e\u793a\u7684\u306b\u6307\u5b9a\u3059\u308b\u5834\u5408\n   doc = Nokogiri::HTML(html.force_encoding('Shift_JIS').encode('UTF-8'))\n\n   # \u30e1\u30bf\u30bf\u30b0\u304b\u3089\u6587\u5b57\u30b3\u30fc\u30c9\u3092\u5224\u5b9a\u3059\u308b\u5834\u5408\n   doc = Nokogiri::HTML(html) do |config|\n     config.strict.noent\n   end\n   charset = doc.at_css('meta[charset]')&amp;.[]('charset') ||\n             doc.at_css('meta[http-equiv=\"Content-Type\"]')&amp;.[]('content')&amp;.match(\/charset=(.+?)($|;)\/i)&amp;.[](1)<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u30a8\u30e9\u30fc\u306e\u89e3\u6c7a\u4f8b<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   def safe_encode(text, from_encoding = 'Shift_JIS')\n     text.force_encoding(from_encoding)\n         .encode('UTF-8', \n                invalid: :replace, \n                undef: :replace, \n                replace: '?')\n   rescue Encoding::InvalidByteSequenceError\n     text.force_encoding('UTF-8')\n   end\n\n   # \u4f7f\u7528\u4f8b\n   html = URI.open('https:\/\/example.jp').read\n   safe_html = safe_encode(html)\n   doc = Nokogiri::HTML(safe_html)<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-22\">\u30d1\u30fc\u30b9\u5931\u6557\u6642\u306e\u5bfe\u51e6\u6cd5<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">HTML\u306e\u30d1\u30fc\u30b9\u306b\u5931\u6557\u3057\u305f\u969b\u306e\u4e00\u822c\u7684\u306a\u5bfe\u51e6\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u58ca\u308c\u305fHTML\u306e\u4fee\u6b63<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # \u4e0d\u6b63\u306a\u30bf\u30b0\u306e\u51e6\u7406\n   html = html.gsub(\/&lt;\\\/?[^&gt;]*&gt;\/) do |tag|\n     if tag =~ \/&lt;\\\/?(?:div|p|span|a|img|h[1-6]|ul|ol|li)[\\s&gt;]\/\n       tag\n     else\n       ''  # \u4e0d\u660e\u306a\u30bf\u30b0\u3092\u524a\u9664\n     end\n   end\n\n   # \u9589\u3058\u30bf\u30b0\u306e\u88dc\u5b8c\n   html = html.gsub(\/&lt;(div|p|span)((?!&gt;).)*?&gt;(?!.*?&lt;\\\/\\1&gt;)\/) do |match|\n     \"#{match}&lt;\/#{$1}&gt;\"\n   end\n\n   doc = Nokogiri::HTML(html)<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u4e00\u822c\u7684\u306a\u30a8\u30e9\u30fc\u3068\u5bfe\u51e6\u6cd5<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u7a7a\u306e\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u30a8\u30e9\u30fc<\/strong> <code>def parse_with_validation(html) doc = Nokogiri::HTML(html) if doc.css('body').empty? raise \"\u6709\u52b9\u306aHTML\u5185\u5bb9\u304c\u898b\u3064\u304b\u308a\u307e\u305b\u3093\" end doc rescue =&gt; e puts \"\u30d1\u30fc\u30b9\u30a8\u30e9\u30fc: #{e.message}\" nil end<\/code><\/li>\n\n\n\n<li><strong>\u7121\u52b9\u306a\u30bb\u30ec\u30af\u30bf\u30a8\u30e9\u30fc<\/strong> <code>def safe_css_select(doc, selector) doc.css(selector) rescue Nokogiri::CSS::SyntaxError =&gt; e puts \"\u7121\u52b9\u306aCSS\u30bb\u30ec\u30af\u30bf: #{selector}\" puts \"\u30a8\u30e9\u30fc: #{e.message}\" [] # \u30a8\u30e9\u30fc\u6642\u306f\u7a7a\u914d\u5217\u3092\u8fd4\u3059 end # \u4f7f\u7528\u4f8b elements = safe_css_select(doc, 'div.content &gt; p')<\/code><\/li>\n<\/ul>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>\u3088\u304f\u3042\u308b\u30c8\u30e9\u30d6\u30eb\u3068\u89e3\u6c7a\u7b56<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u30bb\u30ec\u30af\u30bf\u304c\u8981\u7d20\u3092\u53d6\u5f97\u3067\u304d\u306a\u3044 <code># \u554f\u984c\u306e\u3042\u308b\u4f8b doc.css('.specific-class') # \u8981\u7d20\u304c\u898b\u3064\u304b\u3089\u306a\u3044 # \u89e3\u6c7a\u7b56\uff1a\u968e\u5c64\u3092\u78ba\u8a8d puts doc.at_css('.specific-class')&amp;.parent&amp;.to_html # \u89e3\u6c7a\u7b56\uff1a\u30af\u30e9\u30b9\u540d\u306e\u5b8c\u5168\u4e00\u81f4\u3092\u78ba\u8a8d doc.css('[class=\"specific-class\"]')<\/code><\/li>\n\n\n\n<li>\u52d5\u7684\u30b3\u30f3\u30c6\u30f3\u30c4\u304c\u53d6\u5f97\u3067\u304d\u306a\u3044 <code># JavaScript\u3067\u751f\u6210\u3055\u308c\u308b\u5185\u5bb9\u306f\u901a\u5e38\u306eNokogiri\u3067\u306f\u53d6\u5f97\u3067\u304d\u306a\u3044 # \u89e3\u6c7a\u7b561: API\u3092\u4f7f\u7528 require 'json' require 'net\/http' response = Net::HTTP.get(URI('https:\/\/api.example.com\/data')) data = JSON.parse(response) # \u89e3\u6c7a\u7b562: Selenium\u306a\u3069\u306e\u30d8\u30c3\u30c9\u30ec\u30b9\u30d6\u30e9\u30a6\u30b6\u3092\u4f7f\u7528 require 'selenium-webdriver' driver = Selenium::WebDriver.for :chrome, options: options driver.get(url) html = driver.page_source doc = Nokogiri::HTML(html)<\/code><\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u30c7\u30d0\u30c3\u30b0\u306e\u305f\u3081\u306e\u30d2\u30f3\u30c8\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306e\u69cb\u9020\u3092\u78ba\u8a8d<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   # HTML\u306e\u69cb\u9020\u3092\u78ba\u8a8d\n   puts doc.to_html\n\n   # \u7279\u5b9a\u306e\u8981\u7d20\u306e\u5468\u8fba\u69cb\u9020\u3092\u78ba\u8a8d\n   element = doc.at_css('.target')\n   puts element&amp;.parent&amp;.to_html<\/pre>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li>\u30a8\u30e9\u30fc\u306e\u8a73\u7d30\u3092\u53d6\u5f97<\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   begin\n     doc = Nokogiri::HTML(html)\n     result = doc.css('selector').text\n   rescue =&gt; e\n     puts \"\u30a8\u30e9\u30fc\u30bf\u30a4\u30d7: #{e.class}\"\n     puts \"\u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8: #{e.message}\"\n     puts \"\u30d0\u30c3\u30af\u30c8\u30ec\u30fc\u30b9: #{e.backtrace.join(\"\\n\")}\"\n   end<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u5bfe\u51e6\u6cd5\u3092\u6d3b\u7528\u3059\u308b\u3053\u3068\u3067\u3001\u591a\u304f\u306e\u4e00\u822c\u7684\u306aNokogiri\u306e\u30c8\u30e9\u30d6\u30eb\u3092\u89e3\u6c7a\u3067\u304d\u307e\u3059\u3002\u554f\u984c\u304c\u767a\u751f\u3057\u305f\u5834\u5408\u306f\u3001\u307e\u305aHTML\u306e\u69cb\u9020\u3068\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u3092\u78ba\u8a8d\u3057\u3001\u305d\u306e\u5f8c\u3067\u9069\u5207\u306a\u5bfe\u51e6\u6cd5\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"i-23\">\u5b9f\u8df5\u7684\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u96c6<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-24\">\u30cb\u30e5\u30fc\u30b9\u30b5\u30a4\u30c8\u306e\u8a18\u4e8b\u60c5\u5831\u53d6\u5f97<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30cb\u30e5\u30fc\u30b9\u8a18\u4e8b\u30b9\u30af\u30ec\u30a4\u30d1\u30fc\u306e\u5b9f\u88c5<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class NewsArticleScraper\n     def initialize\n       @headers = {\n         'User-Agent' =&gt; 'NewsBot\/1.0 (contact@example.com)'\n       }\n     end\n\n     def scrape_article(url)\n       doc = Nokogiri::HTML(URI.open(url, @headers))\n\n       {\n         title: extract_title(doc),\n         publish_date: extract_date(doc),\n         author: extract_author(doc),\n         content: extract_content(doc),\n         categories: extract_categories(doc)\n       }\n     end\n\n     private\n\n     def extract_title(doc)\n       # \u4e00\u822c\u7684\u306a\u30cb\u30e5\u30fc\u30b9\u30b5\u30a4\u30c8\u306e\u30bf\u30a4\u30c8\u30eb\u8981\u7d20\u30d1\u30bf\u30fc\u30f3\n       doc.at_css('h1.article-title, .entry-title, [itemprop=\"headline\"]')&amp;.text&amp;.strip\n     end\n\n     def extract_date(doc)\n       # \u65e5\u4ed8\u306e\u62bd\u51fa\u3068\u89e3\u6790\n       date_text = doc.at_css('time, .date, [itemprop=\"datePublished\"]')&amp;.[]('datetime') ||\n                  doc.at_css('time, .date, [itemprop=\"datePublished\"]')&amp;.text\n\n       return nil unless date_text\n       DateTime.parse(date_text) rescue nil\n     end\n\n     def extract_author(doc)\n       doc.at_css('[itemprop=\"author\"], .author-name, .writer')&amp;.text&amp;.strip\n     end\n\n     def extract_content(doc)\n       # \u8a18\u4e8b\u672c\u6587\u306e\u62bd\u51fa\uff08\u5e83\u544a\u3084\u95a2\u9023\u8a18\u4e8b\u3092\u9664\u5916\uff09\n       main_content = doc.css('.article-body p, .entry-content p').map(&amp;:text).join(\"\\n\\n\")\n       main_content.gsub(\/\\n{3,}\/, \"\\n\\n\").strip\n     end\n\n     def extract_categories(doc)\n       doc.css('.category, .tags a').map(&amp;:text).map(&amp;:strip).uniq\n     end\n   end\n\n   # \u4f7f\u7528\u4f8b\n   scraper = NewsArticleScraper.new\n   article = scraper.scrape_article('https:\/\/example.com\/news\/123')<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-25\">E\u30b3\u30de\u30fc\u30b9\u30b5\u30a4\u30c8\u306e\u5546\u54c1\u30c7\u30fc\u30bf\u53ce\u96c6<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u5546\u54c1\u60c5\u5831\u30b9\u30af\u30ec\u30a4\u30d1\u30fc\u306e\u5b9f\u88c5<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class ProductScraper\n     def initialize\n       @rate_limiter = RateLimiter.new(requests_per_minute: 20)\n     end\n\n     def scrape_product_listing(url)\n       @rate_limiter.throttle do\n         doc = Nokogiri::HTML(URI.open(url))\n\n         products = doc.css('.product-item').map do |item|\n           {\n             name: extract_product_name(item),\n             price: extract_price(item),\n             availability: extract_availability(item),\n             specifications: extract_specifications(item),\n             image_url: extract_image_url(item)\n           }\n         end\n\n         {\n           products: products,\n           next_page: extract_next_page(doc)\n         }\n       end\n     end\n\n     private\n\n     def extract_product_name(item)\n       item.at_css('.product-name, h2')&amp;.text&amp;.strip\n     end\n\n     def extract_price(item)\n       price_text = item.at_css('.price, [itemprop=\"price\"]')&amp;.text&amp;.strip\n       return nil unless price_text\n\n       # \u4fa1\u683c\u306e\u30af\u30ea\u30fc\u30cb\u30f3\u30b0\n       price_text.gsub(\/[^\\d]\/, '').to_i\n     end\n\n     def extract_availability(item)\n       status = item.at_css('.stock-status, .availability')&amp;.text&amp;.strip\n       case status\n       when \/\u5728\u5eab\u3042\u308a|\u5728\u5eab\u6709\u308a\/\n         :in_stock\n       when \/\u6b8b\u308a\u308f\u305a\u304b\/\n         :limited_stock\n       else\n         :out_of_stock\n       end\n     end\n\n     def extract_specifications(item)\n       item.css('.specifications li, .specs tr').each_with_object({}) do |spec, hash|\n         key = spec.at_css('.label, th')&amp;.text&amp;.strip\n         value = spec.at_css('.value, td')&amp;.text&amp;.strip\n         hash[key] = value if key &amp;&amp; value\n       end\n     end\n\n     def extract_image_url(item)\n       item.at_css('img.product-image')&amp;.[]('src')\n     end\n\n     def extract_next_page(doc)\n       next_link = doc.at_css('.pagination .next a')\n       next_link&amp;.[]('href')\n     end\n   end<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"i-26\">SNS\u30d7\u30ed\u30d5\u30a3\u30fc\u30eb\u60c5\u5831\u306e\u62bd\u51fa<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u30d7\u30ed\u30d5\u30a3\u30fc\u30eb\u30b9\u30af\u30ec\u30a4\u30d1\u30fc\u306e\u5b9f\u88c5<\/strong><\/li>\n<\/ol>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">   class ProfileScraper\n     def initialize\n       @cache = {}\n     end\n\n     def scrape_profile(url)\n       return @cache[url] if @cache[url]\n\n       doc = Nokogiri::HTML(URI.open(url))\n\n       profile = {\n         username: extract_username(doc),\n         bio: extract_bio(doc),\n         followers: extract_followers(doc),\n         following: extract_following(doc),\n         posts: extract_posts(doc),\n         verified: is_verified?(doc)\n       }\n\n       @cache[url] = profile\n       profile\n     end\n\n     private\n\n     def extract_username(doc)\n       doc.at_css('.profile-username, .user-name')&amp;.text&amp;.strip\n     end\n\n     def extract_bio(doc)\n       doc.at_css('.bio, .profile-description')&amp;.text&amp;.strip\n     end\n\n     def extract_followers(doc)\n       count_text = doc.at_css('.followers-count')&amp;.text&amp;.strip\n       parse_count(count_text)\n     end\n\n     def extract_following(doc)\n       count_text = doc.at_css('.following-count')&amp;.text&amp;.strip\n       parse_count(count_text)\n     end\n\n     def extract_posts(doc)\n       posts = doc.css('.post-item').map do |post|\n         {\n           content: post.at_css('.post-content')&amp;.text&amp;.strip,\n           timestamp: parse_timestamp(post.at_css('.timestamp')&amp;.text),\n           likes: parse_count(post.at_css('.likes-count')&amp;.text)\n         }\n       end\n\n       posts.compact\n     end\n\n     def is_verified?(doc)\n       !!doc.at_css('.verified-badge, .verified-icon')\n     end\n\n     private\n\n     def parse_count(text)\n       return 0 unless text\n\n       case text.downcase\n       when \/k$\/\n         (text.to_f * 1000).to_i\n       when \/m$\/\n         (text.to_f * 1_000_000).to_i\n       else\n         text.gsub(\/[^\\d]\/, '').to_i\n       end\n     end\n\n     def parse_timestamp(text)\n       return nil unless text\n\n       begin\n         case text\n         when \/(\\d+)\u79d2\u524d\/\n           Time.now - $1.to_i\n         when \/(\\d+)\u5206\u524d\/\n           Time.now - ($1.to_i * 60)\n         when \/(\\d+)\u6642\u9593\u524d\/\n           Time.now - ($1.to_i * 3600)\n         when \/(\\d+)\u65e5\u524d\/\n           Time.now - ($1.to_i * 86400)\n         else\n           Time.parse(text)\n         end\n       rescue\n         nil\n       end\n     end\n   end<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u5b9f\u88c5\u4f8b\u306f\u3001\u5b9f\u969b\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u3059\u3050\u306b\u6d3b\u7528\u3067\u304d\u308b\u5b9f\u8df5\u7684\u306a\u30b3\u30fc\u30c9\u3067\u3059\u3002\u305f\u3060\u3057\u3001\u5b9f\u969b\u306e\u4f7f\u7528\u6642\u306b\u306f\u4ee5\u4e0b\u306e\u70b9\u306b\u6ce8\u610f\u3057\u3066\u304f\u3060\u3055\u3044\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u30b5\u30a4\u30c8\u306e\u5229\u7528\u898f\u7d04\u3068robots.txt\u3092\u5fc5\u305a\u78ba\u8a8d\u3059\u308b<\/li>\n\n\n\n<li>\u9069\u5207\u306a\u30ec\u30fc\u30c8\u5236\u9650\u3092\u8a2d\u5b9a\u3059\u308b<\/li>\n\n\n\n<li>\u30a8\u30e9\u30fc\u51e6\u7406\u3092\u5b9f\u88c5\u3059\u308b<\/li>\n\n\n\n<li>\u30ad\u30e3\u30c3\u30b7\u30e5\u6226\u7565\u3092\u691c\u8a0e\u3059\u308b<\/li>\n\n\n\n<li>\u5bfe\u8c61\u30b5\u30a4\u30c8\u306e\u69cb\u9020\u5909\u66f4\u306b\u5bfe\u5fdc\u3067\u304d\u308b\u3088\u3046\u306b\u8a2d\u8a08\u3059\u308b<\/li>\n<\/ol>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u308c\u3089\u306e\u30b3\u30fc\u30c9\u306f\u57fa\u672c\u7684\u306a\u5b9f\u88c5\u4f8b\u3067\u3042\u308a\u3001\u5b9f\u969b\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u306f\u8981\u4ef6\u306b\u5fdc\u3058\u3066\u9069\u5207\u306b\u30ab\u30b9\u30bf\u30de\u30a4\u30ba\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Warning: Undefined array key &#8220;is_admin&#8221; in \/home\/xs392991\/dexall.co.jp\/public_html\/articles\/wp-content\/themes\/ &#8230; <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[],"class_list":["post-1227","post","type-post","status-publish","format-standard","category-ruby","nothumb"],"_links":{"self":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1227","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1227"}],"version-history":[{"count":1,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1227\/revisions"}],"predecessor-version":[{"id":1228,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=\/wp\/v2\/posts\/1227\/revisions\/1228"}],"wp:attachment":[{"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1227"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1227"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/dexall.co.jp\/articles\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1227"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}