
How to Detect and Handle Rate Limiting in Web Scraping
Rate limiting is the top scraping obstacle. Here's how to detect and handle it. Detection import requests , time , random from collections import deque class RateLimitDetector : def __init__ ( self ): self . times = deque ( maxlen = 100 ) self . baseline = None def check ( self , resp ): sig = { " limited " : False , " reasons " :[], " delay " : 0 } if resp . status_code == 429 : sig . update ( limited = True , delay = int ( resp . headers . get ( " Retry-After " , 60 ))) sig [ " reasons " ]. append ( " HTTP 429 " ) if resp . status_code == 503 : sig . update ( limited = True , delay = 30 ) sig [ " reasons " ]. append ( " HTTP 503 " ) if resp . status_code == 200 : for w in [ " captcha " , " recaptcha " , " unusual traffic " , " automated requests " ]: if w in resp . text . lower (): sig . update ( limited = True , delay = 60 ) sig [ " reasons " ]. append ( f " Soft block: { w } " ) break self . times . append ( resp . elapsed . total_seconds ()) if len ( self . times ) > 10 : if not s
Continue reading on Dev.to Tutorial
Opens in a new tab



![[MM’s] Boot Notes — The Day Zero Blueprint — Test Smarter on Day One](/_next/image?url=https%3A%2F%2Fcdn-images-1.medium.com%2Fmax%2F1368%2F1*AvVpFzkFJBm-xns4niPLAA.png&w=1200&q=75)