# ============================== # robots.txt - SEO and AI Bot Control # ============================== # Deployed via Ansible for vm-nginx-1 # AI/LLM Crawlers - Blocked (no SEO value, high resource usage) User-agent: GPTBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: ChatGPT-User Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: Amazonbot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: PetalBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: ClaudeBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: Claude-Web Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: anthropic-ai Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: cohere-ai Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: Bytespider Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: CCBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: omgili Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: PerplexityBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / User-agent: YouBot Content-Signal: ai-train=no, search=yes, ai-input=no Disallow: / # All other bots (Google, Bing, etc.) - Allow with restrictions User-agent: * Content-Signal: ai-train=no, search=yes, ai-input=no # Block pages with no unique content (duplicate content penalty) Disallow: /startTopic/ Disallow: /discover/unread/ Disallow: /markallread/ Disallow: /staff/ Disallow: /cookies/ Disallow: /online/ Disallow: /discover/ Disallow: /leaderboard/ Disallow: /search/ Disallow: /tags/ Disallow: /*?advancedSearchForm= Disallow: /register/ Disallow: /lostpassword/ Disallow: /login/ Disallow: /*currency= # Block faceted pages and 301 redirect pages (infinite crawl risk) Disallow: /*?sortby= Disallow: /*?filter= Disallow: /*?tab= Disallow: /*?do= Disallow: /*ref= Disallow: /*?forumId* Disallow: /*?&controller=embed # infoclimat.fr # TODO: Move in proper file ? Disallow: /previsions-meteo/atmogrammes/ Disallow: /observations-meteo/archives/ Disallow: /fr/cartes/observations-meteo/archives/ Disallow: /historic/getValues.php # Note: Sitemap is managed by the application itself (e.g., /sitemap.php for forums)