Robots.txt
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow: /*/*/l-*/ler$
Disallow: /*/*/l-*/sum$
Disallow: /*/*/placement-*
Disallow: /*/*/final-*
Disallow: /*/user/profile$
Disallow: /*/user/password/change$
Disallow: /*/user/password/set$
Disallow: /*/user/feedback/status/*
Disallow: /*/user/register/confirm$
Disallow: /*/user/email/change$
Disallow: /*/user/login$
# DW content is made available for your personal, non-commercial use. Use of any device,
# tool, or process designed to data mine or scrape the content using automated means is
# prohibited without prior written permission from Deutsche Welle.
# Disallowed Bots
User-agent: AhrefsBot
Disallow: /
User-agent: Ai2Bot
Disallow: /
User-agent: Ai2Bot-Dolma
Disallow: /
User-agent: Amazonbot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Applebot
Disallow: /
User-agent: Applebot-Extended
Disallow: /
User-agent: AwarioRssBot
Disallow: /
User-agent: AwarioSmartBot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: DataForSeoBot
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: dotbot
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: FriendlyCrawler
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: ia_archiver
Disallow: /
User-Agent: ImagesiftBot
Disallow: /
User-agent: img2dataset
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: Meta-ExternalAgent
Disallow: /
User-agent: Meta-ExternalFetcher
Disallow: /
User-agent: NewsNow
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: news-please
Disallow: /
User-agent: OAI-SearchBot
Disallow: /
User-agent: omgili
Disallow: /
User-agent: omgilibot
Disallow: /
User-agent: peer39_crawler
Disallow: /
User-agent: peer39_crawler/1.0
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: Scrapy
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: SirdataBot
Disallow: /
User-agent: Timpibot
Disallow: /
User-agent: TurnitinBot
Disallow: /
User-agent: VelenPublicWebCrawler
Disallow: /
User-agent: Webzio-Extended
Disallow: /
User-agent: Yeti
Disallow: /
User-agent: YouBot
Disallow: /
# Other Bot Rules
User-agent: Twitterbot
Allow: /*?maca=*
# Facebook link sharing preview
User-agent: facebookexternalhit
Allow: /
Sitemap: https://learngerman.dw.com/sitemap.xml
(Nice to have)