Уведомления

Группа в Telegram: @pythonsu

#1 Июль 17, 2022 17:00:51

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

Добрый вечер! Есть скрипт на Питоне. Парсит forebet, predictz, windrawwin, soccervista, prosoccer, vitibet .
Хочу добавить туда еще сайт betodds24.

 import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import cloudscraper
# Default dict that will be used to store everything
predicts = {}
# Simple class just to simplify
class Game:
    def __init__(self):
        self.name = ''
        self.predict = ''
# Running every function that we have for scraping the predicts from each website
# To add more websites, you will need to add the function name on the function array
def main():
    functions = [forebet, predictz, windrawwin, soccervista, prosoccer, vitibet, footystats]
    for func in functions:
        func()
def forebet():
    global predicts
    # https://www.forebet.com/en/football-tips-and-predictions-for-today
    
    predicts['forebet'] = []
    url = "https://www.forebet.com/en/football-tips-and-predictions-for-today"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all(class_="rcnt tr_1")
    for game in games:
        name = game.find("meta", {"itemprop":"name"})
        if name is None: 
            continue
        game_class = Game()
        game_class.name = name.attrs['content']
        game_class.predict = game.find('span', {"class": "forepr"}).text
        predicts['forebet'].append({'game': game_class.name, 'predict': game_class.predict})
def predictz():
    global predicts
    # https://www.predictz.com/predictions
    predicts['predictz'] = []
    scraper = cloudscraper.create_scraper()
    page = scraper.get("https://www.predictz.com/predictions")
    soup = BeautifulSoup(page.text, "html.parser")
    games = soup.find_all(class_='ptcnt')
    for game in games:
        if game.find("div", {"class": "ptmobh"}) is None: 
            continue
        home = game.find("div", {"class": "ptmobh"}).text
        away = game.find("div", {"class": "ptmoba"}).text
        if home == '' or away == '':
            continue
        game_class = Game()
        game_class.name = home + " vs " + away
        predict_text = game.find("div", {"class": "ptpredboxsml"}).text
        game_class.predict = '1' if 'Home' in predict_text else '2' if 'Away' in predict_text else 'X'
        predicts['predictz'].append({'game': game_class.name, 'predict': game_class.predict})
def windrawwin():
    global predicts
    # https://www.windrawwin.com/predictions/today
    predicts['windrawwin'] = []
    scraper = cloudscraper.create_scraper()
    page = scraper.get("https://www.windrawwin.com/predictions/today")
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all(class_='wttr')
    for game in games:
        teams = game.find_all("div", {"class": "wtmoblnk"})
        game_class = Game()
        game_class.name = teams[0].text + " vs " + teams[1].text
        predict_text = game.find("div", {"class": "wtprd"}).text # Home 2-0
        game_class.predict = '1' if 'Home' in predict_text else '2' if 'Away' in predict_text else 'X'
        predicts['windrawwin'].append({'game': game_class.name, 'predict': game_class.predict})
def soccervista():
    global predicts
    # https://www.newsoccervista.com/
    predicts['soccervista'] = []
    url = "https://www.newsoccervista.com"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all(class_='twom')
    for game in games:
        home = game.find("div", {"class": "hometeam"}).text
        away = game.find("div", {"class": "awayteam"}).text
        if home == '' or away == '':
            continue
        game_class = Game()
        game_class.name = home + " vs " + away
        game_class.predict = game.find("strong").text
        predicts['soccervista'].append({'game': game_class.name, 'predict': game_class.predict})
def prosoccer():
    global predicts
    # https://www.prosoccer.gr/en/football/predictions/
    predicts['prosoccer'] = []
    url = "https://www.prosoccer.gr/en/football/predictions"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all('tr')
    for game in games:
        try:
            game_name = game.find("td", {"class": "mio"}).text.lower()
        except:
            continue
        if game_name is None: 
            continue
        game_class = Game()
        game_class.name = game_name.split('-')[0][:-1] + ' vs ' + game_name.split('-')[1][1:]
        
        predict = game.find("span", {"class": "sctip"}).text[1:]
        if '-' in predict:
            predict = predict.split('-')[0]
        game_class.predict = predict
        predicts['prosoccer'].append({'game': game_class.name, 'predict': game_class.predict})
def vitibet():
    global predicts
    # https://www.vitibet.com/index.php?clanek=quicktips&sekce=fotbal&lang=en
    predicts['vitibet'] = []
    url = "https://www.vitibet.com/index.php?clanek=quicktips&sekce=fotbal&lang=en"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all('tr', class_=None)
    for game in games:
        try:
            game_name = game.find_all("td", {"class": "standardbunka"})
        except:
            continue
        if game_name is None or game_name == []: 
            continue
        game_class = Game()
        game_class.name = game_name[1].text + ' vs ' + game_name[2].text
        regex = re.compile('barvapodtipek.*')
        game_class.predict = game.find("td", {"class": regex}).text.replace('0', 'X')
        predicts['vitibet'].append({'game': game_class.name, 'predict': game_class.predict})
def footystats():
    global predicts
    # https://footystats.org/predictions/
    predicts['footystats'] = []
    url = "https://footystats.org/predictions/"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all(class_='betHeaderTitle')
    for game in games:
        predict = game.find("span", {"class": "market"}).text.lower()
        game.find('span', class_="market").decompose()
        game_name = game.text.strip()
        if game_name == 'See More Football Predictions':
            continue
        game_class = Game()
        game_class.name = game_name
        game_class.predict = '1' if 'home win' in predict else '2' if 'away win' in predict else 'X' if 'draw' in predict else predict
        predicts['footystats'].append({'game': game_class.name, 'predict': game_class.predict})
main()
# Storing all the array names to filter the games and the predicts
to_filter = list(predicts.keys())
# Creating an empty array to store all the games from every website
predicts['games'] = ['']
# Group all the games from every website and tried to ignore existing ones
for arr in to_filter:
    for to_add_games in predicts[arr]:
        found = False
        for game in predicts['games']:
            game_teams = to_add_games['game'].split(' vs ')
            home_name = game_teams[0]
            away_name = game_teams[1]
            if home_name.lower() in game.lower() or away_name.lower() in game.lower():
                found = True
        if found == False:
            predicts['games'].append(to_add_games['game'])
# Match the predicts with the games from the websites
for arr in to_filter:
    predicts['predicts_' + arr] = []
    for game in predicts['games']:
        found = False
        for game_to_filter in predicts[arr]:
            game_teams = game_to_filter['game'].split(' vs ')
            home_name = game_teams[0]
            away_name = game_teams[1]
            if home_name.lower() in game.lower() or away_name.lower() in game.lower():
                predicts['predicts_' + arr].append(game_to_filter['predict'])
                found = True
                break
        if found == False:
            predicts['predicts_' + arr].append('')
# Creating the xlsx with the games and the predicts of every website and for each game
df = pd.DataFrame({
    'Games': predicts['games'], 
    'Forebet': predicts['predicts_forebet'], 
    'PredictZ': predicts['predicts_predictz'], 
    'WinDrawWin': predicts['predicts_windrawwin'], 
    'SoccerVista': predicts['predicts_soccervista'],
    'ProSoccer': predicts['predicts_prosoccer'],
    'Vitibet': predicts['predicts_vitibet'],
    'Footystats': predicts['predicts_footystats']
})
writer = pd.ExcelWriter('predicts.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Predicts')
# Simple stylings
writer.sheets['Predicts'].set_column('B:B', 50)
writer.sheets['Predicts'].set_column('E:E', 15)
writer.save()
вот пытался но что то не работает, что ни так подскажите пожалуйста ?
 def betodds24():
    global predicts
    # https://www.betodds24.com/
    
    predicts['betodds24'] = []
    scraper = cloudscraper.create_scraper()
    page = scraper.get("https://www.betodds24.com")
    
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all(class_="tr")
    for game in games:
        home = game.find("tr").text
        avay = game.find("tr").text
        if home == '' or away == '':
            continue
            game_class = Game()
        game_class.name = home + " vs " + away
        game_class.predict = game.find("th").text
        predicts['betodds24'].append({'game': game_class.name, 'predict': game_class.predict})
        

Отредактировано olegshtompel (Июль 17, 2022 17:04:54)

Офлайн

#2 Июль 17, 2022 23:56:50

xam1816
Зарегистрирован: 2020-05-11
Сообщения: 1393
Репутация: +  124  -
Профиль   Отправить e-mail  

Парсер сайтов

не знаю какие данные вам надо, я просто потренировался

  
import requests
import pandas as pd
from bs4 import BeautifulSoup
def get_data_from_betodds24():
    out = []
    resp = requests.get(r'https://www.betodds24.com')
    if resp.ok:
        soup = BeautifulSoup(resp.content, 'html.parser')
        tables = [bl.find('div', class_="tab-pane fade active show")
                  for bl in soup.find_all('div', class_="tab-content" ,id="StatisticsTabs")]
        if tables:
            for t in tables:
                columns = [i.text for i in t.find_all('th') if i.text][:7]
                rows = [[td.text for td in tr.find_all('td') if td.text][:7] for tr in t.find('tbody').find_all('tr')]
                df = pd.DataFrame(rows, columns=columns)
                out.append(df)
        return out
res = get_data_from_betodds24()
for t in res:
    print(t)
    print('\n================\n')
вывод
 
Time Home Away 1 X 2 Tip
0 16:00 Union Santa Fe CA Huracan 39 31 31 X1
1 16:30 El Gounah Talaea El Gaish 30 34 37 X1
2 18:00 San Telmo Sacachispas FC 45 29 26 X1
3 05:00 Heidelberg United Hume City 50 25 25 X1
4 14:15 Flora Tallinn Tallinna Kalev 94 5 0 1

================

Time Home Away 1 X 2 Tip
0 08:30 Slovan Bratislava B Petrzalka 27 25 48 2X
1 13:00 AIK Kalmar FF 54 27 20 X1
2 06:30 Slovan Bratislava B Petrzalka 26 23 51 2X
3 13:00 IK Sirius FK Degerfors IF 51 24 26 1X
4 01:10 Deportivo La Equidad Junior FC 39 31 29 X1

================

Time Home Away 1 X 2 Tip
0 16:00 Brøndby IF AGF Aarhus 48 27 25 X1
1 16:30 La Serena Antofagasta 40 28 31 X1
2 00:00 Charleston Battery Hartford Athletic 32 25 43 2X
3 00:00 Orange County Blues Miami 42 27 31 X1
4 00:00 Forward Madison FC Tormenta FC 34 29 37 X1

================


Process finished with exit code 0

Офлайн

#3 Июль 19, 2022 11:53:36

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

xam1816
мне нужно с этой ссылки https://www.betodds24.com/Football
вытянуть имя первой колонки команд и второй, плюс третья колонка tip прогноз
   
	               Games	                                   Forebet	PredictZ	WinDrawWin
0				
1	Dandong Tengyue vs Hunan Xiangtao	                1            1                X	

вот так выглядит таблица в эксель, я хочу место сайта forebet поменять на betodds24

Отредактировано FishHook (Июль 19, 2022 12:42:33)

Офлайн

#4 Июль 19, 2022 12:35:30

ZerG
Зарегистрирован: 2012-04-05
Сообщения: 2627
Репутация: +  61  -
Профиль   Отправить e-mail  

Парсер сайтов

olegshtompel
И что вам мешает?



Влодение рускай арфаграфией - это как владение кунг-фу: настаящие мастира не преминяют ево бес ниабхадимости

Офлайн

#5 Июль 19, 2022 15:25:22

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

Вот картинка, на примере сайта prosoccer
Зелёным это то что я сам смог разобрать, а вот красным заливка то что я не смог… Реально ли это объяснить ?

Отредактировано olegshtompel (Июль 19, 2022 15:27:32)

Офлайн

#6 Июль 20, 2022 12:15:38

xam1816
Зарегистрирован: 2020-05-11
Сообщения: 1393
Репутация: +  124  -
Профиль   Отправить e-mail  

Парсер сайтов

не понятно что нужно вставить в “predict”

  
def betodds24():
    global predicts
    predicts['betodds24'] = []
    resp = requests.get(r'https://www.betodds24.com/Football')
    if resp.ok:
        soup = BeautifulSoup(resp.content, 'html.parser')
        if table := soup.find('table', class_='table table-hover').find('tbody'):
            for tr in table.find_all('tr'):
                td = tr.find_all('td')
                predicts['betodds24'].append({'game': f'{td[2].text} vs {td[4].text}', 'predict': td[8].text})

добавлен в main()
  
def main():
    functions = [forebet, predictz, windrawwin, soccervista, prosoccer, vitibet, footystats, betodds24]
    for func in functions:
        func()

Офлайн

#7 Июль 20, 2022 13:04:42

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

xam1816
не понятно что нужно вставить в “predict”
Огромное спасибо, всё раблотает ! хочу попробовать еще 3-5 сайтов добавить в эту табличку, попробую сам разобраться, если не смогу, буду очень вам благодарен, если подкорректируете ошибки.

Офлайн

#8 Июль 20, 2022 22:56:41

ZerG
Зарегистрирован: 2012-04-05
Сообщения: 2627
Репутация: +  61  -
Профиль   Отправить e-mail  

Парсер сайтов

rкорректирую как вы просили
Лутц



Влодение рускай арфаграфией - это как владение кунг-фу: настаящие мастира не преминяют ево бес ниабхадимости

Офлайн

#9 Июль 22, 2022 11:46:09

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

xam1816
не понятно что нужно вставить в “predict”
Вот ещё один сайт хочу добавить, но что то не то, друг помоги.
 def your1x2():
    global predicts
    predicts['your1x2'] = []
    resp = requests.get(r'https://www.your1x2.com/football')
    if resp.ok:
        soup = BeautifulSoup(resp.content, 'html.parser')
        if table := soup.find('table', class_='table table striped table-hover').find('tbody'):
            for tr in table.find_all('tr'):
                td = tr.find_all('td')
                predicts['your1x2'].append({'game': f'{td[2].text} vs {td[4].text}', 'predict': td[8].text})

Офлайн

#10 Июль 24, 2022 14:52:36

olegshtompel
Зарегистрирован: 2022-07-17
Сообщения: 18
Репутация: +  0  -
Профиль   Отправить e-mail  

Парсер сайтов

Вот еще такой вариант, но не работает увы…

 def your1x2():
    global predicts
    predicts['your1x2'] = []
    url = "https://www.your1x2.com/football"
    page = requests.get(url, headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"})
    soup = BeautifulSoup(page.content, "html.parser")
    games = soup.find_all('tr')
    
    for game in games:
        try:
            game_name = game.find("class_=table-hover").text
        except:
            continue
        if game_name is None: 
            continue
        game_class = Game()
        game_class.name = home + " vs " + away
        game_class.predict = game.find("th").text
        predicts['your1x2'].append({'game': game_class.name, 'predict': game_class.predict})

Офлайн

Board footer

Модераторировать

Powered by DjangoBB

Lo-Fi Version