From 6ccea391ed5e05cdd65b78fc1c7ada40f22fffe3 Mon Sep 17 00:00:00 2001 From: Edward Loveall Date: Mon, 15 Jan 2024 14:55:14 -0500 Subject: [PATCH] Add a bunch of well-known, LLM scrapers to robots.txt Unknown if this will actually stop them, but at least I can show my intent. User agents sourced from https://darkvisitors.com/ --- CHANGELOG | 1 + public/robots.txt | 59 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 587e22f..b4987cf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Unreleased +* Add a bunch of well-known, LLM scrapers to robots.txt * Add command to tag releases * Modernize nix config * Added scribe.manasiwibi.com instance diff --git a/public/robots.txt b/public/robots.txt index 1200905..228a82c 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -1,4 +1,55 @@ -# Learn more about robots.txt: https://www.robotstxt.org/robotstxt.html -User-agent: * -# 'Disallow' with an empty value allows all paths to be crawled -Disallow: +# ChatGPT-User +User-agent: ChatGPT-User +Disallow: / + +# cohere-ai +User-agent: cohere-ai +Disallow: / + +# anthropic-ai +User-agent: anthropic-ai +Disallow: / + +# Bytespider +User-agent: Bytespider +Disallow: / + +# CCBot +User-agent: CCBot +Disallow: / + +# FacebookBot +User-agent: FacebookBot +Disallow: / + +# Google-Extended +User-agent: Google-Extended +Disallow: / + +# GPTBot +User-agent: GPTBot +Disallow: / + +# omgili +User-agent: omgili +Disallow: / + +# Amazonbot +User-agent: Amazonbot +Disallow: / + +# Applebot +User-agent: Applebot +Disallow: / + +# PerplexityBot +User-agent: PerplexityBot +Disallow: / + +# PerplexityBot +User-agent: PerplexityBot +Disallow: / + +# YouBot +User-agent: YouBot +Disallow: /