httpd / botblocking: fix syntax on bot rewrite

These have to be in "s in order to do a string comparison, since
they were not, they were never matching anything. ;(

Fix them all up, and also block a few more repos on pagure that are
getting heavily crawled.

Signed-off-by: Kevin Fenzi <kevin@scrye.com>
This commit is contained in:
Kevin Fenzi 2025-05-13 11:39:25 -07:00
parent 2db0be9ae8
commit 91e9a5627d
5 changed files with 29 additions and 5 deletions

View file

@ -6,7 +6,7 @@ RewriteRule ^.*$ https://fedoraproject.org/wiki/Infrastructure/Mirroring#Tools_t
# Spiders-gone-wild
# These spiders may not follow robots.txt and will
# hit admin sections which consume large amounts of CPU
RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC]
RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC]
RewriteRule .* - [F,L]
RewriteRule ^/$ /pub [R=302,L]

View file

@ -11,7 +11,7 @@ RewriteRule .* https://%{HTTP_HOST}%{REQUEST_URI} [R=301,NE,L]
# Spiders-gone-wild
# These spiders may not follow robots.txt and will
# hit admin sections which consume large amounts of CPU
RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot|SemrushBot).*$ [NC]
RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC]
RewriteRule .* - [F,L]
# /wiki must come before /w due to prefix matching.

View file

@ -43,7 +43,7 @@ RewriteRule ^/$ /archives [R,L]
# Spiders-gone-wild
# These spiders may not follow robots.txt and will
# hit admin sections which consume large amounts of CPU
RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC]
RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC]
RewriteRule .* - [F,L]
# Old static archives

View file

@ -54,7 +54,7 @@ RewriteRule ^/$ /nagios/ [R]
# Spiders-gone-wild
# These spiders may not follow robots.txt and will
# hit admin sections which consume large amounts of CPU
RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC]
RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC]
RewriteRule .* - [F,L]
Alias /nagios /usr/share/nagios/html/

View file

@ -152,10 +152,34 @@ MaxConnectionsPerChild 1000
RewriteCond %{REQUEST_URI} ^/binutils-gdb/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/releng/fedora-scm-requests/blob/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/releng/fedora-scm-requests/raw/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/fork/sergiomb/releng/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/fork/.*/pagure/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/fork/.*/389-base/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/llvm-project-mirror/.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/gfwlist/raw/master.*$
RewriteRule .* - [F]
RewriteCond %{REQUEST_URI} ^/fork/mohanboddu/fedora-infra/ansible.*$
RewriteRule .* - [F]
# Spiders-gone-wild
# These spiders may not follow robots.txt and will
# hit admin sections which consume large amounts of CPU
RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot).*$ [NC]
RewriteCond %{HTTP_USER_AGENT} "(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot)" [NC]
RewriteRule .* - [F,L]
<Location /apache-status>