Saya mencoba mengikis data dari situs web ini:

https://www.realestate.com.au/find-agent/victoria-park-wa-6100?page=1&source=results

Tetapi ketika saya mengirim permintaan ke tautan ini, itu mengembalikan respons 429. Tolong siapa pun dapat membantu saya untuk menyelesaikan masalah ini

Kode saya:

import requests


headers = {
    'authority':'www.realestate.com.au',
    'method':'GET',
    'path':'/find-agent/victoria-park-wa-6100?page=2&source=results',
    'scheme':'https',
    'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding':'gzip, deflate, br',
    'accept-language':'en-US,en;q=0.9',
    'cache-control':'max-age=0',
    'cookie':'reauid=97fa56b87e4a0000f2385760e50300009f5d0f00; mid=14438305973304081772; s_vi=[CS]v1|302B9C7D549B351B-400019144A08CC6D[CE]; s_ecid=MCMID%7C41762198941431542153915924747747631103; VT_LANG=language%3Den-US; _fbp=fb.2.1616328959328.1411085120; mako_fpc_id=f3b48060-6ca0-468b-8417-5c4331844f22; s_nr=1616329068252; optimizelyEndUserId=oeu1616329142109r0.5813387456564119; QSI_SI_41tFIUPeSKv873L_intercept=true; Country=PK; KP_UID=d67a8604-7a55-ac63-2939-33605bab499e; AWSELB=BD21ABD912FD962534A86FF37C471AF8CEA612D2DA2EA79D0C4C2C0C12582F925E886BFF2B41577284C9C9332FB9815B2C31177FDADF0414684498FC6684185E03DAFE8405; AWSELBCORS=BD21ABD912FD962534A86FF37C471AF8CEA612D2DA2EA79D0C4C2C0C12582F925E886BFF2B41577284C9C9332FB9815B2C31177FDADF0414684498FC6684185E03DAFE8405; AMCVS_341225BE55BBF7E17F000101%40AdobeOrg=1; _stc=typedBookmarked; s_cc=true; _gid=GA1.3.18226979.1617461578; id5id.1st_last=1617461596633; id5id.1st=%20%7B%20%22created_at%22%3A%20%222021-03-21T12%3A16%3A10Z%22%2C%20%22id5_consent%22%3A%20true%2C%20%22original_uid%22%3A%20%22ID5-ZHMOgkOOAbPohthleKT8pkJQyU4qgaQLzVhn2vjkdQ!%22%2C%20%22universal_uid%22%3A%20%22ID5-ZHMOlX7LDRWCg5thfmHmJYTpjMpYoxNkps0RO4mHMg!%22%2C%20%22signature%22%3A%20%22ID5_AVH0GTjpdbBHqWV59BP1-i8JEjSp8aybqVzONYmXhddYziHVh_rLdvcsTPnzZz3HIs2ri9B26KW9Z91dH2DMfnU%22%2C%20%22link_type%22%3A%202%2C%20%22cascade_needed%22%3A%20true%7D; id5id.1st_123_nb=0; AMCV_341225BE55BBF7E17F000101%40AdobeOrg=-330454231%7CMCIDTS%7C18721%7CMCMID%7C41762198941431542153915924747747631103%7CMCAAMLH-1618132227%7C3%7CMCAAMB-1618132227%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1617534627s%7CNONE%7CMCAID%7C302B9C7D549B351B-400019144A08CC6D%7CvVersion%7C3.1.2; _sp_ses.2fe7=*; _ga=GA1.3.1408343425.1616328955; QSI_HistorySession=https%3A%2F%2Fwww.realestate.com.au%2Ffind-agent%2Fvictoria-park-wa-6100%3Fpage%3D1%26source%3Dresults~1617527430603; KP_UIDz=F4XHgsQV9c%2FTQzll5Ndulg%3D%3D%3A%3A5IsfhjHoNmnkht19ND97crnQoMskUJ6aIoCOa8EPwoXMwlMMS47PUmEZSHeeyPI83MXIH%2F9C%2BxMMGCWyl0ApKdv616egyT7xhhLGmw2jal4LA1Ml3dHb8uhosY7j1eXPCWC1bL%2FCWgdwrt5tPoarmvSfzz7SLbZep6ETdU83TKp%2BVqcFobRaam91qFzw02WRwiEQtgeBKGo2cnE9PzRdGTtYhWFAT6TZeKjmeSQOTsOftptuJO%2B833w7vUsSQkIM5zxsk1Qb8sZJnhBokdV8sbdyRu39hKFppEbDHLY%2BnItOkrNGBraYsgNgg%2BLFGEchLvHFtfCfCXzugFW9f5piym0247JXapdyez5mwoVbuW444Eefc5i7MOAsXxEjVa%2BBtK0%2BqVAma5QbOF6McCBMgMzZvvKkd4LNCNYGOXoNc9zvACln1vSbJy7XlKQMmGcajYV1tRuifffi%2BGwRWWeQ%2FTd6eWVarXc48n7seoWl80mYX6LK7HCXoXq0BH5X%2BiTjT%2BSKwK7w%2FqMXQcgt%2FfErDnZhEr2auXw%2FnGA0reE8SyU%3D; pageview_counter.srs=6; s_sq=%5B%5BB%5D%5D; _sp_id.2fe7=c11fc61a-511f-444b-b13d-e6f2c8fb6b8a.1616328956.10.1617527442.1617468348.ab95c4fd-1806-47b2-a4b5-aad4a1e4103f; _ga_F962Q8PWJ0=GS1.1.1617527427.11.1.1617527441.0; utag_main=v_id:017854b683fe0022250ca432779003072002806a00ac2$_sn:10$_ss:0$_st:1617529241850$vapi_domain:realestate.com.au$dc_visit:10$ses_id:1617527426094%3Bexp-session$_pn:1%3Bexp-session$dc_event:2%3Bexp-session$dc_region:ap-southeast-2%3Bexp-session; External=%2FAPPNEXUS%3D4266307067876008443%2FCASALE%3DX5bknF4BHoabg%252EzfklIojAAA%2526910%2FPUBMATIC%3DE3F1E12A-9392-415E-BFA3-8E9A2D3A6383%2FRUBICON%3DKGQO4LPG-4-7Y1I%2FTRIPLELIFT%3D160265391696123554%2F_EXP%3D1649063429%2F_exp%3D1649063441',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
}

url = 'https://www.realestate.com.au/find-agent/victoria-park-wa-6100?page=1&source=results'
r = requests.get(url, headers=headers)


print(r.status_code)

Keluaran:

429
0
Malik Ibrahim 4 April 2021, 12:34

1 menjawab

Jawaban Terbaik

Ketika Anda melakukan pencarian di browser standar, Anda melihat halaman 429 juga di tab Jaringan (alat pengembang F12). Saya tidak tahu bagaimana Anda membangun header Anda dan dari mana Anda mendapatkannya, tetapi cookie sangat penting di sini. Terutama item "KP_UIDZ" dan "KP_UID".

Kode berikut ini bekerja untuk saya:

import requests as rq
from bs4 import BeautifulSoup as bs


url = 'https://www.realestate.com.au/find-agent/victoria-park-wa-6100?page=1&source=results'
s = rq.Session()

headers = {
"Host": "www.realestate.com.au",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://www.realestate.com.au/find-agent/victoria-park-wa-6100",
"Cookie":  "KP_UIDz=fxINWtvcCUSmOElOEVDUtA%3D%3D%3A%3ATttOpt%2FDO7HlYzZedmreLNB%2FZ39%2FHjUPNaT%2FA%2BLweO7JKXJP7SYoAD6VTztnQDRGWRpaoBGCLLYczrKHZ00KZo%2BthnlfT7c5f3SAFCsxwf3%2FeOY15LXdfeymzsR7BKd%2Buwh21Hf5p8XlSs%2BsmXYy47G4Kw3S9ennT2q3nXXz8gXWlqX8neOhg1hDuV%2BB%2BH5WZoXPPwqcnR3HLfv5IHhN4Ou%2BaS6n1hkjhGv8mN6ku4JlAyY%2BoVqYVESwZxgbxF09zegcWVBAC1vtZv8sdDzkVfkeYW4j7zcVsK6UeDrBB8fOH0eIngTtulnYT25cI9r%2FpgBBtWrygLrprmAxpDBskwaRduJcTt0Cud7xp8YuPpTA1wbxCq5UyAaI1HoKPJtuvikA28pmDV79Udv6lwD6z48WfyfnUl9RIO1f1xeHkcKvOLDcd74GUEg3A6x8HuIVqaIfQd%2FcQhOJoWEQG2vTbSe04r9gRHZgWnV4IZOPPatAkAqvMnGBrSZsWfoV4AXT;  KP_UID=b89f9896-2bd6-148c-7f02-c5abee0cc14e",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"TE": "Trailers"
}

q = s.get(url, headers=headers)
print(q.status_code) # give 200
soup = bs(q.content, "lxml")

Sebagai bukti bahwa bagian parsing mengembalikan data untuk setiap agen:

rr = soup.find_all("div", class_=lambda value: value and value.startswith("agent-card__details"))

full_data = []
for j in range(len(rr)):

    labels = rr[j].find_all("div", class_=lambda value: value and value.startswith("key-feature__label"))
    vals = rr[j].find_all("div", class_=lambda value: value and value.startswith("key-feature__value"))
    name = rr[j].find_all("div", class_="agent-profile__name")

    labels = [i.text for i in labels]
    vals = [i.text for i in vals]
    name = name[0].text
    # print(name)

    data = list(zip(labels, vals))
    data.insert(0, name)
    full_data.append(data)

"full_data" berisi:

[['Edward Lim',
  ('Properties sold(as lead agent)', '24'),
  ('Median sold price', '$420k'),
  ('Median days advertised', '68'),
  ('PropertiessoldProperties sold', '46')],
 ['Lee & Derek Baston',
  ('Properties sold(as lead agent)', '21'),
  ('Median sold price', '$655k'),
  ('Median days advertised', '41'),
  ('PropertiessoldProperties sold', '48')],
 ['Fulton Borthwick',
  ('Properties sold(as lead agent)', '7'),
  ('Median sold price', '$515k'),
  ('Median days advertised', '71'),
  ('PropertiessoldProperties sold', '18')],
....
....
1
ce.teuf 5 April 2021, 15:39