urls=[]for season in range(1992, 2024): urls.append(['https://fbref.com/en/comps/9/',int(season),'-',(int(season)+1),'/schedule/',int(season),'-',(int(season)+1),'-Premier-League-Scores-andFi...
urls=[]
for season in range(1992, 2024):
urls.append(['https://fbref.com/en/comps/9/',int(season),'-',(int(season)+1),'/schedule/',int(season),'-',(int(season)+1),'-Premier-League-Scores-andFixtures'])
urls = pd.DataFrame(urls,dtype=str)
urls_combined = urls.apply(lambda x: ''.join(x), axis=1)
urls_combined
https://fbref.com/en/comps/9/1992-1993/schedule/1992-1993-Premier-League-Scores-and-Fixtures
https://fbref.com/en/comps/9/1992-1993/schedule/1993-1994-Premier-League-Scores-and-Fixtures
https://fbref.com/en/comps/9/1992-1993/schedule/1994-1995-Premier-League-Scores-and-Fixtures
依此类推,大约 30 行。
我想读取每一行并根据每行生成的 read_html 创建一个新的数据框。
只读一个:
url_df = 'https://fbref.com/en/comps/9/1992-1993/schedule/1992-1993-Premier-League-Scores-and-Fixtures'
df = pd.read_html(url_df)[0]
df
Wk Day Date Home Score Away Attendance Venue Referee
0 1.0 Sat 1992-08-15 Southampton 0–0 Tottenham 19654.0 The Dell Vic Callow
1 1.0 Sat 1992-08-15 Coventry City 2–1 Middlesbrough 12681.0 Highfield Road Howard King
2 1.0 Sat 1992-08-15 Sheffield Utd 2–1 Manchester Utd 28070.0 Bramall Lane Brian Hill
3 1.0 Sat 1992-08-15 Arsenal 2–4 Norwich City 24030.0 Highbury Alan Gunn
4 1.0 Sat 1992-08-15 Crystal Palace 3–3 Blackburn 17086.0 Selhurst Park Roger Milford
... ... ... ... ... ... ... ... ... ...
527 42.0 Sun 1993-05-09 QPR 2–1 Aston Villa 18904.0 Loftus Road Paul Durkin
528 NaN NaN NaN NaN NaN NaN NaN NaN NaN
529 37.0 Tue 1993-05-11 Arsenal 1–3 Tottenham 26393.0 Highbury Keith Cooper
530 NaN NaN NaN NaN NaN NaN NaN NaN NaN
531 40.0 Tue 1993-05-11 QPR 3–1 Sheffield Weds 12177.0 Loftus Road David Allison
532 rows × 9 columns