import requests
from bs4 import BeautifulSoup
def fetch_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'class': 'data-table'})
rows = table.find_all('tr')
data = []
for row in rows:
cols = row.find_all('td')
cols = [col.text.strip() for col in cols]
data.append(cols)
return data
url = "http://www.shanxi.gov/statistics"
data = fetch_data(url)
print(data[:5])
CREATE TABLE IF NOT EXISTS shanxi_statistics (
id INT AUTO_INCREMENT PRIMARY KEY,
year VARCHAR(4),
region VARCHAR(20),
population INT,
gdp DECIMAL(10, 2)
);
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_sql("SELECT * FROM shanxi_statistics", connection)
df['gdp'] = df['gdp'].astype(float)
grouped = df.groupby('region')['gdp'].sum().sort_values(ascending=False)
grouped.plot(kind='bar')
plt.title('GDP Distribution Across Regions in Shanxi')
plt.xlabel('Region')
plt.ylabel('GDP (in billion CNY)')
plt.show()