User-agent: * Allow: /social_instagram/ Sitemap: https://www.simplifiedcomputers.com/sitemap.xml ############## # custom # ############## # These rules apply to all user-agents not specifically listed below. # This includes Googlebot, Bingbot, etc. #Disallow: /*?sort=* #Disallow: /*&sort=* # Block specific crawlers used for AI training or aggressive scraping. # --- Category: AI & Large-Scale Data Crawlers --- User-agent: GPTBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Omgilibot Disallow: / User-agent: omgili Disallow: / # --- Category: SEO & Marketing Tool Crawlers --- User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SEOkicks Disallow: / User-agent: serpstatbot Disallow: / # --- Category: Non-Target-Market Search Engines & Scrapers --- User-agent: Yandex Disallow: / User-agent: Baiduspider Disallow: / User-agent: Bytespider Disallow: / User-agent: PetalBot Disallow: / User-agent: Scrapy Disallow: / User-agent: HTTrack Disallow: / # Specifically allow SemRush SiteAuditBot User-agent: SiteAuditBot Allow: / User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: /