#User-agent: * #Noindex: /pipermail #Crawl-delay: 5 # #User-agent: AdIdxBot #Disallow: / # # #User-agent: archive.org_bot #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # ## 3k of 7.7k bot hits from Ahrefs today ## 13k of 50k over longer period ## https://ahrefs.com/robot/ ## arguably more useful, just too active #User-agent: AhrefsBot #Disallow: / # #User-agent: Applebot #Disallow: / # # #User-agent: AdsBot-Google #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: aolbuild #Disallow: / # #User-agent: Ask Jeeves #Disallow: / # #User-agent: baidu #Disallow: / # #User-agent: Baiduspider #Disallow: / # # #User-agent: bingbot #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # #User-agent: bingpreview #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # ## Ignores robot.txt. OK, we'll disallow the sucka in .htaccess #User-agent: BLEXBot #Disallow: / # #User-agent: CCBot #Disallow: / # #User-agent: charlotte #Disallow: / # #User-agent: Cliqzbot #Disallow: / # #User-agent: coccocbot-image #Disallow: / # #User-agent: coccocbot-web #Disallow: / # #User-agent: crawler4j #Disallow: / # #User-agent: Daum #Disallow: / # #User-agent: Download Ninja #Disallow: / # # #User-agent: DuckDuckBot #Crawl-delay: 10 #Disallow: #Noindex: #host: linuxmafia.com # #User-agent: DuckDuckGo-Favicons-Bot #Crawl-delay: 10 #Disallow: #Noindex: #host: linuxmafia.com # # #User-agent: Exabot #Disallow: / # #User-agent: ExtLinksBot #Disallow: / # # ## bad bot, go away ## http://graphicline.co.za/blogs/what-is-ezooms-bot ## 7k hits out of 50k bot hits #User-agent: Ezooms #disallow: / # # #User-agent: facebookexternalhit #Disallow: / # #User-agent: Fetch #Disallow: / # #User-agent: Freshbot #Disallow: / # #User-agent: GarlikCrawler #Disallow: / # #User-agent: Gigabot #Disallow: / # #User-agent: GiHoBBy #Disallow: / # #User-agent: Gogolbot #Disallow: / # #User-agent: Go-http-client #Disallow: / # # #User-agent: Googlebot #Disallow: #Noindex: #Host: linuxmafia.com # #User-agent: Googlebot-Mobile #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: grub-client #Disallow: / # #User-agent: HTTrack #Disallow: / # #User-agent: HubSpot Links Crawler 2.0 #Disallow: / # # ##Reported to ignore robots.txt, used for Alexa Crawler, later toInternet Archive #User-agent: ia_archiver #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: IABTechLab Ads.txt Crawler #Disallow: / # #User-agent: istellabot #Disallow: / # #User-agent: Jeeves #Disallow: / # ## Ignores robots.txt, last checked #User-agent: k2spider #Disallow: / # #User-agent: larbin #Disallow: / # #User-agent: libwww #Disallow: / # #User-agent: linkdexbot #Disallow: / # #User-agent: linko #Disallow: / # #User-agent: ltx71 #Disallow: / # #User-agent: Mail.RU_Bot #Disallow: / # #User-agent: MauiBot #Disallow: / # #User-agent: MBCrawler #Disallow: / # # #User-agent: Mediapartners-Google #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: MJ12bot #Disallow: / # #User-agent: MojeekBot #Disallow: / # #User-agent: MSIECrawler #Disallow: / # # #User-agent: msnbot #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: MS Search 6.0 Robot #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: netseer #Disallow: / # #User-agent: NPBot #Disallow: / # #User-agent: Offline Explorer #Disallow: / # #User-agent: proximic #Disallow: / # #User-agent: psbot #Disallow: / # #User-agent: Qwantify #Disallow: / # #User-agent: Scrapy #Disallow: / # #User-agent: Screaming Frog SEO Spider #Disallow: / # #User-agent: SemrushBot #Disallow: / # #User-agent: SemrushBot-BA #Disallow: / # #User-agent: SEOkicks #Disallow: / # #User-agent: SeznamBot #Disallow: / # #User-agent: sitecheck.internetseer.com #Disallow: / # #User-agent: SiteSnagger #Disallow: / # # #User-agent: Slurp #Crawl-delay: 10 #Disallow: #Noindex: #Host: linuxmafia.com # # #User-agent: Sogou web spider #Disallow: / # #User-agent: Steeler #Disallow: / # #User-agent: Teleport #Disallow: / # #User-agent: Teoma #Disallow: / # #User-agent: The Knowledge AI #Disallow: / # #User-agent: tracemyfile #Disallow: / # #User-agent: Twitterbot #Disallow: / # #User-agent: UbiCrawler #Disallow: / # #User-agent: Uptimebot #Disallow: / # #User-agent: Vagabondo #Disallow: / # #User-agent: WebCopier #Disallow: / # #User-agent: WebReaper #Disallow: / # #User-agent: WebStripper #Disallow: / # #User-agent: WebZIP #Disallow: / # #User-agent: WikiDo #Disallow: / # #User-agent: Xenu #Disallow: / # #User-agent: Yahoo! Slurp #Disallow: / # #User-agent: yandex #Disallow: / # #User-agent: YandexBot #Disallow: / # #User-agent: YisouSpider #Disallow: / # #User-agent: Zealbot #Disallow: / # #User-agent: ZyBORG #Disallow: / # #