2018-10-08 03:08:35 +00:00
|
|
|
import requests
|
|
|
|
from lxml import html
|
|
|
|
import urllib
|
|
|
|
import re
|
|
|
|
import json
|
|
|
|
|
|
|
|
class Result:
|
|
|
|
def __init__(self, title, seeders, leechers, size, author, url):
|
|
|
|
self.title = str(title)
|
|
|
|
self.seeders = int(seeders)
|
|
|
|
self.leechers = int(leechers)
|
|
|
|
self.size = str(size)
|
|
|
|
self.author = str(author)
|
|
|
|
self.url = str(url)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
myjson = {}
|
|
|
|
myjson['title'] = self.title
|
|
|
|
myjson['seeders'] = self.seeders
|
|
|
|
myjson['leechers'] = self.leechers
|
|
|
|
myjson['size'] = self.size
|
|
|
|
myjson['author'] = self.author
|
|
|
|
myjson['url'] = self.url
|
|
|
|
|
|
|
|
return json.dumps(myjson)
|
|
|
|
|
|
|
|
class Scraper:
|
|
|
|
def __init__( self, config ):
|
|
|
|
# the request client, belongs to session even if no "user session" is needed
|
|
|
|
self.client = requests.Session()
|
|
|
|
self.config = config
|
|
|
|
|
|
|
|
def craft_url(self, protocol, proxy, search_terms):
|
|
|
|
# https://pirate.blue/s/?q=Raising+Arizona&category=0&page=0&orderby=99
|
|
|
|
f = { 'q': search_terms, 'category': 0, 'page': 0, 'orderby': 99 }
|
2018-10-08 04:26:41 +00:00
|
|
|
url = str.format( "{0}://{1}/s/?{2}", protocol, proxy, urllib.parse.urlencode(f) )
|
|
|
|
print(url)
|
|
|
|
return url
|
2018-10-08 03:08:35 +00:00
|
|
|
|
|
|
|
def get_results(self, search_terms):
|
|
|
|
url = self.craft_url( "https", self.config.proxy, search_terms )
|
|
|
|
|
|
|
|
fetch_results = self.client.get( url )
|
|
|
|
results_list = self.Parser.scrape( "results_list", fetch_results.content )
|
|
|
|
|
|
|
|
return results_list
|
|
|
|
|
|
|
|
|
|
|
|
def get_magnet(self, url):
|
|
|
|
url = "https://" + self.config.proxy + url
|
|
|
|
fetch_results = self.client.get(url)
|
|
|
|
|
|
|
|
magnet = self.Parser.scrape( "magnet_link", fetch_results.content )
|
|
|
|
|
|
|
|
return magnet
|
|
|
|
|
|
|
|
class Parser:
|
|
|
|
@staticmethod
|
|
|
|
def scrape( datapoint, text ):
|
|
|
|
cases = {
|
|
|
|
"results_list": Scraper.Parser.results_list,
|
|
|
|
"magnet_link": Scraper.Parser.magnet_link
|
|
|
|
}
|
|
|
|
return cases[ datapoint ]( text )
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def results_list( text ):
|
|
|
|
resultsTable = html.fromstring( text )
|
|
|
|
resultsTable_xpath = resultsTable.xpath( '//table[@id="searchResult"]/tr' )
|
|
|
|
|
|
|
|
results_buffer = list()
|
|
|
|
|
|
|
|
for tr in resultsTable_xpath:
|
2020-02-23 07:35:44 +00:00
|
|
|
title = tr.xpath('td[2]/div[1]/a[1]/text()')
|
2018-10-08 03:08:35 +00:00
|
|
|
seeders = tr.xpath('td[3]/text()')[0]
|
|
|
|
leechers = tr.xpath('td[4]/text()')[0]
|
2018-10-08 04:26:41 +00:00
|
|
|
author = tr.xpath('td[2]/font/a/text()')
|
2018-10-08 03:08:35 +00:00
|
|
|
size_unprocessed = tr.xpath('td[2]/font/text()')[0]
|
|
|
|
url = tr.xpath('td/div[@class="detName"]/a[@class="detLink"]/@href')[0]
|
|
|
|
|
|
|
|
|
|
|
|
m = re.search('Size (.+?),', size_unprocessed)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
size = m.group(1)
|
|
|
|
|
|
|
|
|
|
|
|
results_buffer.append(
|
|
|
|
Result(title, seeders, leechers, size, author, url)
|
|
|
|
)
|
|
|
|
|
|
|
|
return results_buffer
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def magnet_link( text ):
|
|
|
|
link_page = html.fromstring( text )
|
|
|
|
magnet_link = link_page.xpath('//div[@class="download"]/a/@href')[0]
|
|
|
|
return magnet_link
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SessionError( Exception ):
|
|
|
|
def __init__( self, value ):
|
|
|
|
self.value = value
|
|
|
|
|