# ========================================== # 🤖 robots.txt for www.argoldsmith.com # Purpose: SEO-safe, AI-friendly, crawler-secure # ========================================== User-agent: * Disallow: /*?ceri711= # ------------------------------------------ # UNIVERSAL BLOCKS FOR ALL BOTS # ------------------------------------------ User-agent: * Disallow: /admin-portal-trap/ Disallow: /forms/ Disallow: /cgi-bin/ Disallow: /private/ Disallow: /*.php$ Allow: /assets/css/ Allow: /assets/js/ Allow: /assets/vendor/ Allow: /assets/img/ Allow: /*.css$ Allow: /*.js$ # Bait/trap paths (should never be indexed) Disallow: /do-not-index-this-page-403/ # ========================================================== # 🚫 BLOCKED BOTS: Aggressive SEO Scrapers & No-Value Crawlers # ========================================================== User-agent: AhrefsBot # SEO scraper User-agent: SemrushBot # SEO tool User-agent: MJ12bot # Link indexer User-agent: DotBot # Moz crawler User-agent: Rogerbot # Moz (legacy) User-agent: BLEXBot # Unknown behavior User-agent: DataForSeoBot # SERP scraping User-agent: Barkrowler # Low-value crawler User-agent: MauiBot # Aggressive bandwidth use User-agent: ZoominfoBot # B2B lead scraper User-agent: SISTRIX # Visibility tool User-agent: SearchmetricsBot # SEO monitoring User-agent: PetalBot # Huawei crawler User-agent: AspiegelBot # Huawei EU User-agent: Exabot # No clear value User-agent: Slurp # Yahoo (deprecated) Disallow: / # ========================================================== # 🔒 BLOCKED BOTS: AI Crawlers with No Known Attribution # ========================================================== User-agent: Bytespider # ByteDance/TikTok – no links, unclear training policy Disallow: / # ========================================================== # 🛠 BLOCKED BOTS: CLI Tools, Scrapers, Vulnerability Scanners # ========================================================== User-agent: HTTrack # Website copier User-agent: wget User-agent: curl User-agent: Go-http-client User-agent: Python-urllib User-agent: python-requests User-agent: Scrapy # Python scraper User-agent: sqlmap # SQL injection scanner User-agent: Nikto # Web vulnerability scanner User-agent: WPScan # WordPress scanner User-agent: Acunetix # Security scanner User-agent: Nmap # Network scanner User-agent: ZmEu # Exploit bot User-agent: masscan # Port scanner Disallow: / # ========================================================== # ✅ ALL OTHER BOTS — INCLUDING SEO AND AI — ARE WELCOME # ========================================================== User-agent: * Allow: / # ------------------------------------------ # ✅ ALLOW ALL SEO/AI CRAWLERS (Consolidated) # Last updated: 2025-07-21 # ------------------------------------------ # ✅ Search Engine Bots User-agent: Googlebot User-agent: Bingbot User-agent: DuckDuckBot User-agent: AdsBot-Google User-agent: AdsBot-Google-Mobile User-agent: Slurp # Yahoo Allow: / # ✅ AI and LLM Crawlers — Explicitly Allowed User-agent: GPTBot # OpenAI User-agent: ClaudeBot # Anthropic User-agent: anthropic-ai User-agent: GeminiBot # Google DeepMind User-agent: Google-Extended # Bard/Generative Search User-agent: Amazonbot # Alexa AI and others User-agent: PerplexityBot # Perplexity AI User-agent: Applebot # Apple Search/AI User-agent: YouBot # You.com AI User-agent: NeevaBot # (Deprecated but still crawls) Allow: / # ------------------------------------------ # 🤖 Default behavior for all other bots # ------------------------------------------ User-agent: * Allow: / # Optional exclusions for all LLMs: Disallow: /admin-portal-trap/ Disallow: /thank-you.html Disallow: /*?utm_* # ------------------------------------------ # Sitemap # ------------------------------------------ Sitemap: https://www.argoldsmith.com/sitemap.xml