From af2cadbda9075ecbe829d36edf173935d27697f6 Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Wed, 12 Mar 2025 10:07:07 -0700 Subject: [PATCH] wiki: 404 a list of bots Bots are hitting the wiki pretty hard and we don't particularly care about indexing it anymore, as most real docs should have moved to docs.fedoraproject.org. Also, many of these bots ignore robots.txt or do other things we don't want. Signed-off-by: Kevin Fenzi --- .../reverseproxy/templates/reversepassproxy.fp-wiki.conf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf b/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf index a59d73c160..4209ac7f22 100644 --- a/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf +++ b/roles/httpd/reverseproxy/templates/reversepassproxy.fp-wiki.conf @@ -8,6 +8,11 @@ RewriteCond %{HTTPS} off RewriteCond %{QUERY_STRING} action= [NC] RewriteRule .* https://%{HTTP_HOST}%{REQUEST_URI} [R=301,NE,L] +# Spiders-gone-wild +# These spiders may not follow robots.txt and will +# hit admin sections which consume large amounts of CPU +RewriteCond %{HTTP_USER_AGENT} ^.*(Bytespider|ClaudeBot|Amazonbot|YandexBot|ChatGLM-Spider|GPTBot|Barkrowler|YisouSpider|MJ12bot|SemrushBot).*$ [NC] +RewriteRule .* - [F,L] # /wiki must come before /w due to prefix matching. ProxyPass {{wikipath}} {{proxyurl}}{{wikipath}}