Skip to content

Commit

Permalink
Properly added capmonster support
Browse files Browse the repository at this point in the history
  • Loading branch information
DerLeole committed Aug 16, 2024
1 parent fae234e commit daf6542
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 55 deletions.
73 changes: 73 additions & 0 deletions flathunter/captcha/capmonster_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Captcha solver for CapMonster Captcha Solving Service (https://capmonster.cloud)"""
import json
from typing import Dict
from time import sleep
import backoff
import requests

from flathunter.logging import logger
from flathunter.captcha.captcha_solver import (
CaptchaSolver,
CaptchaBalanceEmpty,
CaptchaUnsolvableError,
GeetestResponse,
AwsAwfResponse,
RecaptchaResponse,
)

class CapmonsterSolver(CaptchaSolver):
"""Implementation of Captcha solver for CapMonster"""


def solve_awswaf(self, sitekey: str, iv: str, context: str, challenge_script: str, captcha_script: str, page_url: str) -> AwsAwfResponse:
"""Solves AWS WAF Captcha"""
logger.info("Trying to solve AWS WAF.")
params = {
"clientKey": self.api_key,
"task": {
"type": "AmazonTaskProxyless",
"websiteURL": page_url,
"challengeScript": "",
"captchaScript": captcha_script,
"websiteKey": sitekey,
"context": "",
"iv": "",
"cookieSolution": True
}
}
captcha_id = self.__submit_capmonster_request(params)
untyped_result = self.__retrieve_capmonster_result(captcha_id)
return AwsAwfResponse(untyped_result)

@backoff.on_exception(**CaptchaSolver.backoff_options)
def __submit_capmonster_request(self, params: Dict[str, str]) -> str:
submit_url = "https://api.capmonster.cloud/createTask"
submit_response = requests.post(submit_url, json=params, timeout=30)
logger.info("Got response from capmonster: %s", submit_response.text)

response_json = submit_response.json()

return response_json["taskId"]


@backoff.on_exception(**CaptchaSolver.backoff_options)
def __retrieve_capmonster_result(self, captcha_id: str):
retrieve_url = "https://api.capmonster.cloud/getTaskResult"
params = {
"clientKey": self.api_key,
"taskId": captcha_id
}
while True:
retrieve_response = requests.get(retrieve_url, json=params, timeout=30)
logger.debug("Got response from capmonster: %s", retrieve_response.text)

response_json = retrieve_response.json()
if not "status" in response_json:
raise requests.HTTPError(response=response_json["errrorCode"])

if response_json["status"] == "processing":
logger.info("Captcha is not ready yet, waiting...")
sleep(5)
continue
if response_json["status"] == "ready":
return response_json["solution"]["cookies"]["aws-waf-token"]
56 changes: 1 addition & 55 deletions flathunter/captcha/twocaptcha_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,6 @@ def solve_geetest(self, geetest: str, challenge: str, page_url: str) -> GeetestR
return GeetestResponse(untyped_result["geetest_challenge"],
untyped_result["geetest_validate"],
untyped_result["geetest_seccode"])


def solve_awswaf(self, sitekey: str, iv: str, context: str, challenge_script: str, captcha_script: str, page_url: str) -> AwsAwfResponse:
"""Solves AWS WAF Captcha"""
logger.info("Trying to solve AWS WAF.")
params = {
"clientKey": "bafad5d1c0f567b8f2f4312c02340ba2",
"task": {
"type": "AmazonTaskProxyless",
"websiteURL": page_url,
"challengeScript": "",
"captchaScript": captcha_script,
"websiteKey": sitekey,
"context": "",
"iv": "",
"cookieSolution": True
}
}
captcha_id = self.__submit_capmonster_request(params)
untyped_result = self.__retrieve_capmonster_result(captcha_id)
return AwsAwfResponse(untyped_result)


def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaResponse:
Expand Down Expand Up @@ -110,37 +89,4 @@ def __retrieve_2captcha_result(self, captcha_id: str):
if not retrieve_response.text.startswith("OK"):
raise requests.HTTPError(response=retrieve_response)

return retrieve_response.text.split("|", 1)[1]

@backoff.on_exception(**CaptchaSolver.backoff_options)
def __submit_capmonster_request(self, params: Dict[str, str]) -> str:
submit_url = "https://api.capmonster.cloud/createTask"
submit_response = requests.post(submit_url, json=params, timeout=30)
logger.info("Got response from capmonster: %s", submit_response.text)

response_json = submit_response.json()

return response_json["taskId"]


@backoff.on_exception(**CaptchaSolver.backoff_options)
def __retrieve_capmonster_result(self, captcha_id: str):
retrieve_url = "https://api.capmonster.cloud/getTaskResult"
params = {
"clientKey": "bafad5d1c0f567b8f2f4312c02340ba2",
"taskId": captcha_id
}
while True:
retrieve_response = requests.get(retrieve_url, json=params, timeout=30)
logger.debug("Got response from capmonster: %s", retrieve_response.text)

response_json = retrieve_response.json()
if not "status" in response_json:
raise requests.HTTPError(response=response_json["errrorCode"])

if response_json["status"] == "processing":
logger.info("Captcha is not ready yet, waiting...")
sleep(5)
continue
if response_json["status"] == "ready":
return response_json["solution"]["cookies"]["aws-waf-token"]
return retrieve_response.text.split("|", 1)[1]
14 changes: 14 additions & 0 deletions flathunter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from flathunter.captcha.captcha_solver import CaptchaSolver
from flathunter.captcha.imagetyperz_solver import ImageTyperzSolver
from flathunter.captcha.twocaptcha_solver import TwoCaptchaSolver
from flathunter.captcha.capmonster_solver import CapmonsterSolver
from flathunter.crawler.kleinanzeigen import Kleinanzeigen
from flathunter.crawler.idealista import Idealista
from flathunter.crawler.immobiliare import Immobiliare
Expand Down Expand Up @@ -36,6 +37,7 @@ class Env:
# Captcha setup
FLATHUNTER_2CAPTCHA_KEY = _read_env("FLATHUNTER_2CAPTCHA_KEY")
FLATHUNTER_IMAGETYPERZ_TOKEN = _read_env("FLATHUNTER_IMAGETYPERZ_TOKEN")
FLATHUNTER_CAPMONSTER_KEY = _read_env("FLATHUNTER_CAPMONSTER_KEY")
FLATHUNTER_HEADLESS_BROWSER = _read_env("FLATHUNTER_HEADLESS_BROWSER")
FLATHUNTER_IS24_COOKIE = _read_env("FLATHUNTER_IS24_COOKIE")

Expand Down Expand Up @@ -300,6 +302,10 @@ def _get_imagetyperz_token(self):
def get_twocaptcha_key(self) -> str:
"""API Token for 2captcha"""
return self._read_yaml_path("captcha.2captcha.api_key", "")

def get_capmonster_key(self) -> str:
"""API Token for Capmonster"""
return self._read_yaml_path("captcha.capmonster.api_key", "")

def _get_captcha_solver(self) -> Optional[CaptchaSolver]:
"""Get configured captcha solver"""
Expand All @@ -310,6 +316,10 @@ def _get_captcha_solver(self) -> Optional[CaptchaSolver]:
twocaptcha_api_key = self.get_twocaptcha_key()
if twocaptcha_api_key:
return TwoCaptchaSolver(twocaptcha_api_key)

capmonster_api_key = self.get_capmonster_key()
if capmonster_api_key:
return CapmonsterSolver(capmonster_api_key)

return None

Expand Down Expand Up @@ -399,6 +409,10 @@ def _get_imagetyperz_token(self):
def get_twocaptcha_key(self) -> str:
"""Return the currently configured 2captcha API key"""
return Env.FLATHUNTER_2CAPTCHA_KEY() or super().get_twocaptcha_key() # pylint: disable=no-member

def get_capmonster_key(self) -> str:
"""Return the currently configured Capmonster API key"""
return Env.FLATHUNTER_CAPMONSTER_KEY() or super().get_capmonster_key()

def captcha_driver_arguments(self):
"""The list of driver arguments for Selenium / Webdriver"""
Expand Down

0 comments on commit daf6542

Please sign in to comment.