From ba4cf28d24a29b8eb191f6481e52a68d0a443d2a Mon Sep 17 00:00:00 2001 From: JJ Date: Thu, 25 Jul 2024 12:22:30 -0700 Subject: update robots.txt --- robots.txt | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/robots.txt b/robots.txt index 51a87af..d99652e 100644 --- a/robots.txt +++ b/robots.txt @@ -4,29 +4,31 @@ Disallow: /chronology/ # See https://seirdy.one/robots.txt +## Ad Scrapers User-agent: Adsbot -Disallow: / - User-agent: BLEXBot +User-agent: peer39_crawler +User-agent: peer39_crawler/1.0 Disallow: / +## IP (not that IP) Scrapers User-agent: BrandVerity/1.0 -Disallow: / - User-agent: CheckMarkNetwork/1.0 -Disallow: / - +User-agent: CheckMarkNetwork/1.0 (+https://www.checkmarknetwork.com/spider.html) User-agent: NPBot -Disallow: / - +User-agent: PiplBot User-agent: SlySearch -Disallow: / - User-agent: TurnitinBot Disallow: / +## LLM Scrapers User-agent: ChatGPT-User +User-agent: GPTBot +User-agent: ClaudeBot Disallow: / -User-agent: GPTBot +## (More) LLM Scrapers +User-agent: Applebot-Extended +User-agent: Google-Extended +User-agent: FacebookBot Disallow: / -- cgit v1.2.3-70-g09d2