From 91e9a5627d018af9439b0801f46bbc99ed308e57 Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Tue, 13 May 2025 11:39:25 -0700 Subject: [PATCH] httpd / botblocking: fix syntax on bot rewrite These have to be in "s in order to do a string comparison, since they were not, they were never matching anything. ;( Fix them all up, and also block a few more repos on pagure that are getting heavily crawled. Signed-off-by: Kevin Fenzi --- .../httpd/dl.fedoraproject.org/rewrite.conf | 2 +- .../templates/reversepassproxy.fp-wiki.conf | 2 +- roles/mailman3/templates/mailmanweb.conf.j2 | 2 +- .../templates/httpd/nagios.conf.j2 | 2 +- roles/pagure/templates/0_pagure.conf | 26 ++++++++++++++++++- 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf b/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf index 6ca112f2f6..3239a7773c 100644 --- a/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf +++ b/roles/download/files/httpd/dl.fedoraproject.org/rewrite.conf @@ -6,7 +6,7 @@ RewriteRule ^.*$ https://fedoraproject.org/wiki/Infrastructure/Mirroring#Tools_t # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU -RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] +RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC] RewriteRule .* - [F,L] RewriteRule ^/$ /pub [R=302,L] diff --git a/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf b/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf index 4209ac7f22..673a6d4e78 100644 --- a/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf +++ b/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf @@ -11,7 +11,7 @@ RewriteRule .* https://%{HTTP_HOST}%{REQUEST_URI} [R=301,NE,L] # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU -RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot|SemrushBot).*$ [NC] +RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC] RewriteRule .* - [F,L] # /wiki must come before /w due to prefix matching. diff --git a/roles/mailman3/templates/mailmanweb.conf.j2 b/roles/mailman3/templates/mailmanweb.conf.j2 index f877bfb429..c6260729d0 100644 --- a/roles/mailman3/templates/mailmanweb.conf.j2 +++ b/roles/mailman3/templates/mailmanweb.conf.j2 @@ -43,7 +43,7 @@ RewriteRule ^/$ /archives [R,L] # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU -RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] +RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC] RewriteRule .* - [F,L] # Old static archives diff --git a/roles/nagios_server/templates/httpd/nagios.conf.j2 b/roles/nagios_server/templates/httpd/nagios.conf.j2 index ec9a9a39f0..1271b1b972 100644 --- a/roles/nagios_server/templates/httpd/nagios.conf.j2 +++ b/roles/nagios_server/templates/httpd/nagios.conf.j2 @@ -54,7 +54,7 @@ RewriteRule ^/$ /nagios/ [R] # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU -RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] +RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC] RewriteRule .* - [F,L] Alias /nagios /usr/share/nagios/html/ diff --git a/roles/pagure/templates/0_pagure.conf b/roles/pagure/templates/0_pagure.conf index 7e4564252b..ab47b7c4a3 100644 --- a/roles/pagure/templates/0_pagure.conf +++ b/roles/pagure/templates/0_pagure.conf @@ -152,10 +152,34 @@ MaxConnectionsPerChild 1000 RewriteCond %{REQUEST_URI} ^/binutils-gdb/.*$ RewriteRule .* - [F] + RewriteCond %{REQUEST_URI} ^/releng/fedora-scm-requests/blob/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/releng/fedora-scm-requests/raw/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/fork/sergiomb/releng/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/fork/.*/pagure/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/fork/.*/389-base/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/llvm-project-mirror/.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/gfwlist/raw/master.*$ + RewriteRule .* - [F] + + RewriteCond %{REQUEST_URI} ^/fork/mohanboddu/fedora-infra/ansible.*$ + RewriteRule .* - [F] + # Spiders-gone-wild # These spiders may not follow robots.txt and will # hit admin sections which consume large amounts of CPU - RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC] + RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC] RewriteRule .* - [F,L]