# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Atrium Digital Collection · robots.txt
# Generated at build time. Reviewed by the curator team; questions to
# curator@museum.example.
#
# TL;DR
#   - Human visitors and classic search engines: welcome, crawl freely.
#   - AI training crawlers: metadata is fair use, imagery is not. Slow
#     down to a request every few seconds — we'll enforce if you don't.
#   - Mass-download tooling: please don't.
#
# This file is advisory. The Cloudflare edge enforces the same policy
# for crawlers that choose to ignore it.

# ---------------------------------------------------------------------
# Classic search indexers — full access, no rate guidance needed.
# ---------------------------------------------------------------------
User-agent: Googlebot
Allow: /

User-agent: bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

# ---------------------------------------------------------------------
# AI training crawlers — metadata is fine, please skip the imagery, and
# keep the request rate sustainable. Rate-limited at the edge regardless.
# ---------------------------------------------------------------------
User-agent: GPTBot
Allow: /collection/
Allow: /api/
Disallow: /assets/
Crawl-delay: 6

User-agent: ClaudeBot
Allow: /collection/
Allow: /api/
Disallow: /assets/
Crawl-delay: 6

User-agent: CCBot
Allow: /collection/
Allow: /api/
Disallow: /assets/
Crawl-delay: 10

User-agent: Google-Extended
Allow: /collection/
Allow: /api/
Disallow: /assets/

User-agent: PerplexityBot
Allow: /collection/
Allow: /api/
Disallow: /assets/
Crawl-delay: 6

User-agent: anthropic-ai
Allow: /collection/
Allow: /api/
Disallow: /assets/
Crawl-delay: 6

# ---------------------------------------------------------------------
# Generic scraping tooling — not welcome. These tools rarely honour
# robots.txt, so the Cloudflare WAF custom rule
# "demo-art-collection-waf-scraper-block" enforces the same decision.
# ---------------------------------------------------------------------
User-agent: python-requests
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: curl
Disallow: /

User-agent: wget
Disallow: /

User-agent: HTTrack
Disallow: /

# ---------------------------------------------------------------------
# Default for any User-agent we didn't explicitly list.
# ---------------------------------------------------------------------
User-agent: *
Allow: /

Sitemap: https://art-collection.ent.alphams.work/sitemap.xml