Форум сайта python.su
0
Добрый вечер! Есть скрипт на Питоне. Парсит forebet, predictz, windrawwin, soccervista, prosoccer, vitibet .
Хочу добавить туда еще сайт betodds24.
import requests from bs4 import BeautifulSoup import pandas as pd import re import cloudscraper # Default dict that will be used to store everything predicts = {} # Simple class just to simplify class Game: def __init__(self): self.name = '' self.predict = '' # Running every function that we have for scraping the predicts from each website # To add more websites, you will need to add the function name on the function array def main(): functions = [forebet, predictz, windrawwin, soccervista, prosoccer, vitibet, footystats] for func in functions: func() def forebet(): global predicts # https://www.forebet.com/en/football-tips-and-predictions-for-today predicts['forebet'] = [] url = "https://www.forebet.com/en/football-tips-and-predictions-for-today" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all(class_="rcnt tr_1") for game in games: name = game.find("meta", {"itemprop":"name"}) if name is None: continue game_class = Game() game_class.name = name.attrs['content'] game_class.predict = game.find('span', {"class": "forepr"}).text predicts['forebet'].append({'game': game_class.name, 'predict': game_class.predict}) def predictz(): global predicts # https://www.predictz.com/predictions predicts['predictz'] = [] scraper = cloudscraper.create_scraper() page = scraper.get("https://www.predictz.com/predictions") soup = BeautifulSoup(page.text, "html.parser") games = soup.find_all(class_='ptcnt') for game in games: if game.find("div", {"class": "ptmobh"}) is None: continue home = game.find("div", {"class": "ptmobh"}).text away = game.find("div", {"class": "ptmoba"}).text if home == '' or away == '': continue game_class = Game() game_class.name = home + " vs " + away predict_text = game.find("div", {"class": "ptpredboxsml"}).text game_class.predict = '1' if 'Home' in predict_text else '2' if 'Away' in predict_text else 'X' predicts['predictz'].append({'game': game_class.name, 'predict': game_class.predict}) def windrawwin(): global predicts # https://www.windrawwin.com/predictions/today predicts['windrawwin'] = [] scraper = cloudscraper.create_scraper() page = scraper.get("https://www.windrawwin.com/predictions/today") soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all(class_='wttr') for game in games: teams = game.find_all("div", {"class": "wtmoblnk"}) game_class = Game() game_class.name = teams[0].text + " vs " + teams[1].text predict_text = game.find("div", {"class": "wtprd"}).text # Home 2-0 game_class.predict = '1' if 'Home' in predict_text else '2' if 'Away' in predict_text else 'X' predicts['windrawwin'].append({'game': game_class.name, 'predict': game_class.predict}) def soccervista(): global predicts # https://www.newsoccervista.com/ predicts['soccervista'] = [] url = "https://www.newsoccervista.com" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all(class_='twom') for game in games: home = game.find("div", {"class": "hometeam"}).text away = game.find("div", {"class": "awayteam"}).text if home == '' or away == '': continue game_class = Game() game_class.name = home + " vs " + away game_class.predict = game.find("strong").text predicts['soccervista'].append({'game': game_class.name, 'predict': game_class.predict}) def prosoccer(): global predicts # https://www.prosoccer.gr/en/football/predictions/ predicts['prosoccer'] = [] url = "https://www.prosoccer.gr/en/football/predictions" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all('tr') for game in games: try: game_name = game.find("td", {"class": "mio"}).text.lower() except: continue if game_name is None: continue game_class = Game() game_class.name = game_name.split('-')[0][:-1] + ' vs ' + game_name.split('-')[1][1:] predict = game.find("span", {"class": "sctip"}).text[1:] if '-' in predict: predict = predict.split('-')[0] game_class.predict = predict predicts['prosoccer'].append({'game': game_class.name, 'predict': game_class.predict}) def vitibet(): global predicts # https://www.vitibet.com/index.php?clanek=quicktips&sekce=fotbal&lang=en predicts['vitibet'] = [] url = "https://www.vitibet.com/index.php?clanek=quicktips&sekce=fotbal&lang=en" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all('tr', class_=None) for game in games: try: game_name = game.find_all("td", {"class": "standardbunka"}) except: continue if game_name is None or game_name == []: continue game_class = Game() game_class.name = game_name[1].text + ' vs ' + game_name[2].text regex = re.compile('barvapodtipek.*') game_class.predict = game.find("td", {"class": regex}).text.replace('0', 'X') predicts['vitibet'].append({'game': game_class.name, 'predict': game_class.predict}) def footystats(): global predicts # https://footystats.org/predictions/ predicts['footystats'] = [] url = "https://footystats.org/predictions/" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all(class_='betHeaderTitle') for game in games: predict = game.find("span", {"class": "market"}).text.lower() game.find('span', class_="market").decompose() game_name = game.text.strip() if game_name == 'See More Football Predictions': continue game_class = Game() game_class.name = game_name game_class.predict = '1' if 'home win' in predict else '2' if 'away win' in predict else 'X' if 'draw' in predict else predict predicts['footystats'].append({'game': game_class.name, 'predict': game_class.predict}) main() # Storing all the array names to filter the games and the predicts to_filter = list(predicts.keys()) # Creating an empty array to store all the games from every website predicts['games'] = [''] # Group all the games from every website and tried to ignore existing ones for arr in to_filter: for to_add_games in predicts[arr]: found = False for game in predicts['games']: game_teams = to_add_games['game'].split(' vs ') home_name = game_teams[0] away_name = game_teams[1] if home_name.lower() in game.lower() or away_name.lower() in game.lower(): found = True if found == False: predicts['games'].append(to_add_games['game']) # Match the predicts with the games from the websites for arr in to_filter: predicts['predicts_' + arr] = [] for game in predicts['games']: found = False for game_to_filter in predicts[arr]: game_teams = game_to_filter['game'].split(' vs ') home_name = game_teams[0] away_name = game_teams[1] if home_name.lower() in game.lower() or away_name.lower() in game.lower(): predicts['predicts_' + arr].append(game_to_filter['predict']) found = True break if found == False: predicts['predicts_' + arr].append('') # Creating the xlsx with the games and the predicts of every website and for each game df = pd.DataFrame({ 'Games': predicts['games'], 'Forebet': predicts['predicts_forebet'], 'PredictZ': predicts['predicts_predictz'], 'WinDrawWin': predicts['predicts_windrawwin'], 'SoccerVista': predicts['predicts_soccervista'], 'ProSoccer': predicts['predicts_prosoccer'], 'Vitibet': predicts['predicts_vitibet'], 'Footystats': predicts['predicts_footystats'] }) writer = pd.ExcelWriter('predicts.xlsx', engine='xlsxwriter') df.to_excel(writer, sheet_name='Predicts') # Simple stylings writer.sheets['Predicts'].set_column('B:B', 50) writer.sheets['Predicts'].set_column('E:E', 15) writer.save()
def betodds24(): global predicts # https://www.betodds24.com/ predicts['betodds24'] = [] scraper = cloudscraper.create_scraper() page = scraper.get("https://www.betodds24.com") soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all(class_="tr") for game in games: home = game.find("tr").text avay = game.find("tr").text if home == '' or away == '': continue game_class = Game() game_class.name = home + " vs " + away game_class.predict = game.find("th").text predicts['betodds24'].append({'game': game_class.name, 'predict': game_class.predict})
Отредактировано olegshtompel (Июль 17, 2022 17:04:54)
Офлайн
124
не знаю какие данные вам надо, я просто потренировался
import requests import pandas as pd from bs4 import BeautifulSoup def get_data_from_betodds24(): out = [] resp = requests.get(r'https://www.betodds24.com') if resp.ok: soup = BeautifulSoup(resp.content, 'html.parser') tables = [bl.find('div', class_="tab-pane fade active show") for bl in soup.find_all('div', class_="tab-content" ,id="StatisticsTabs")] if tables: for t in tables: columns = [i.text for i in t.find_all('th') if i.text][:7] rows = [[td.text for td in tr.find_all('td') if td.text][:7] for tr in t.find('tbody').find_all('tr')] df = pd.DataFrame(rows, columns=columns) out.append(df) return out res = get_data_from_betodds24() for t in res: print(t) print('\n================\n')
Time Home Away 1 X 2 Tip
0 16:00 Union Santa Fe CA Huracan 39 31 31 X1
1 16:30 El Gounah Talaea El Gaish 30 34 37 X1
2 18:00 San Telmo Sacachispas FC 45 29 26 X1
3 05:00 Heidelberg United Hume City 50 25 25 X1
4 14:15 Flora Tallinn Tallinna Kalev 94 5 0 1
================
Time Home Away 1 X 2 Tip
0 08:30 Slovan Bratislava B Petrzalka 27 25 48 2X
1 13:00 AIK Kalmar FF 54 27 20 X1
2 06:30 Slovan Bratislava B Petrzalka 26 23 51 2X
3 13:00 IK Sirius FK Degerfors IF 51 24 26 1X
4 01:10 Deportivo La Equidad Junior FC 39 31 29 X1
================
Time Home Away 1 X 2 Tip
0 16:00 Brøndby IF AGF Aarhus 48 27 25 X1
1 16:30 La Serena Antofagasta 40 28 31 X1
2 00:00 Charleston Battery Hartford Athletic 32 25 43 2X
3 00:00 Orange County Blues Miami 42 27 31 X1
4 00:00 Forward Madison FC Tormenta FC 34 29 37 X1
================
Process finished with exit code 0
Офлайн
0
xam1816мне нужно с этой ссылки https://www.betodds24.com/Football
Games Forebet PredictZ WinDrawWin 0 1 Dandong Tengyue vs Hunan Xiangtao 1 1 X
Отредактировано FishHook (Июль 19, 2022 12:42:33)
Офлайн
61
olegshtompelИ что вам мешает?
Офлайн
0
Вот картинка, на примере сайта prosoccer
Зелёным это то что я сам смог разобрать, а вот красным заливка то что я не смог… Реально ли это объяснить ?
Отредактировано olegshtompel (Июль 19, 2022 15:27:32)
Офлайн
124
не понятно что нужно вставить в “predict”
def betodds24(): global predicts predicts['betodds24'] = [] resp = requests.get(r'https://www.betodds24.com/Football') if resp.ok: soup = BeautifulSoup(resp.content, 'html.parser') if table := soup.find('table', class_='table table-hover').find('tbody'): for tr in table.find_all('tr'): td = tr.find_all('td') predicts['betodds24'].append({'game': f'{td[2].text} vs {td[4].text}', 'predict': td[8].text})
def main(): functions = [forebet, predictz, windrawwin, soccervista, prosoccer, vitibet, footystats, betodds24] for func in functions: func()
Офлайн
0
xam1816Огромное спасибо, всё раблотает ! хочу попробовать еще 3-5 сайтов добавить в эту табличку, попробую сам разобраться, если не смогу, буду очень вам благодарен, если подкорректируете ошибки.
не понятно что нужно вставить в “predict”
Офлайн
61
rкорректирую как вы просили
Лутц
Офлайн
0
xam1816Вот ещё один сайт хочу добавить, но что то не то, друг помоги.
не понятно что нужно вставить в “predict”
def your1x2(): global predicts predicts['your1x2'] = [] resp = requests.get(r'https://www.your1x2.com/football') if resp.ok: soup = BeautifulSoup(resp.content, 'html.parser') if table := soup.find('table', class_='table table striped table-hover').find('tbody'): for tr in table.find_all('tr'): td = tr.find_all('td') predicts['your1x2'].append({'game': f'{td[2].text} vs {td[4].text}', 'predict': td[8].text})
Офлайн
0
Вот еще такой вариант, но не работает увы…
def your1x2(): global predicts predicts['your1x2'] = [] url = "https://www.your1x2.com/football" page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"}) soup = BeautifulSoup(page.content, "html.parser") games = soup.find_all('tr') for game in games: try: game_name = game.find("class_=table-hover").text except: continue if game_name is None: continue game_class = Game() game_class.name = home + " vs " + away game_class.predict = game.find("th").text predicts['your1x2'].append({'game': game_class.name, 'predict': game_class.predict})
Офлайн