Конкретно мне нужно понять, как именно устроена программа, за что отвечает каждая ее строка или несколько строк. Т.е. мне разъяснение, как программа работает. Люди добрые, Программисты помогите кто чем может.) Текст программы:
import requests
import json
from re import compile, search
from configparser import ConfigParser
from bs4 import BeautifulSoup
class GetAllBase(object):
def __init__(self):
self.url = ‘'
self._settings()
self.file = open(’rez.json', ‘a’)
self.need_more = False
def _settings(self):
config = ConfigParser()
config.read('config.conf')
self.headers = {
‘Host’: ‘bepspb.ru’,
‘User-Agent’: ‘Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0’,
‘Referer’: 'http://bepspb.ru/',
‘X-Requested-With’: ‘XMLHttpRequest’,
‘X-MicrosoftAjax’: ‘Delta=true’
}
self.data = {
‘__EVENTTARGET’: config.get('Main', ‘__EVENTTARGET’),
‘__EVENTVALIDATION’: config.get('Main', ‘__EVENTVALIDATION’),
‘__CVIEWSTATE’: config.get('Main', ‘__CVIEWSTATE’),
}
self._id = compile('/(\d+)/$')
self.address = compile('(?:адрес?


self.area = compile('(+)РєРІ?')
self.cadastre = compile('\d{2}:\d{2}:\d{1,7}:\d+')
def _get_pages(self, start_page):
for page in range(start_page, 13):
self.data = self.data + f'{page}'.zfill(2)
self.get_items()
def parse(self):
self._get_pages(1)
while True:
self._get_pages(3)
def get_items(self):
counter = 0
resp = requests.post('http://www.bepspb.ru/', headers=self.headers, data=self.data)
soup = BeautifulSoup(resp.text, ‘lxml’)
for item in soup.find_all('tr', class_=“gridRow”):
_, lot, price, end_date, status, desc =
top = item.find('a', class_=“tip-lot”)
title = top.text.strip().replace('\n', ‘').replace(’\r', ‘;’)
link = top.get('href')
_id = self._id.search(link).group(1)
address = self.address.search(title)
area = self.area.search(title)
cadastre = self.cadastre.search(title)
if not cadastre:
cadastre = search('\d{2}-\d{2}-\d{2}/\d{3}/\d{4}-\d+', title)
if area or address or cadastre:
address = address.group(1) if address else address
area = area.group(1).replace(' ‘, ’').replace('.', ‘,’) if area else area
cadastre = cadastre.group(0) if cadastre else cadastre
json.dump({'_id': _id, ‘lot’: lot, ‘link’: link, ‘price’: price.replace(' ‘, ’'), ‘end_date’: end_date,
‘status’: status, ‘title’: title, ‘address’: address, ‘area’: area, ‘cadastre’: cadastre},
self.file, ensure_ascii=False)
self.file.write('\n')
counter += 1
self.data = search('__CVIEWSTATE(+)', resp.text).group(1)
self.data = search('__EVENTVALIDATION(+)', resp.text).group(1)
print(f'Found: {counter}')
a = GetAllBase()
class UrlOpener(FancyURLopener, object):
version = ‘Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11’
pass
isFlatInfoUpdated = 0
flatPriceDelta = 0
if len(oldFlatsDBFilePath):
oldFlatsDBCursor.execute('''SELECT flatPriceInfo FROM Flats WHERE flatPageURL = ? AND flatAddress = ? AND flatWholeSquare = ? AND flatLivingSquare = ? AND flatKitchenSquare = ?''', (flatPageURL, flatAddress, flatWholeSquare, flatLivingSquare, flatKitchenSquare,))
oldFlatInfoRow = oldFlatsDBCursor.fetchone()
if oldFlatInfoRow is not None and oldFlatInfoRow is not None:
isFlatInfoUpdated = 1
oldFlatPriceInfo = oldFlatInfoRow
try:
flatPriceDelta = float(flatPriceInfo) - float(oldFlatPriceInfo)
except ValueError:
pass
a.parse()