1
2import pandas as pd
3import matplotlib.pyplot as plt
4from google.colab import files
5
6
7uploaded = files.upload()
8
9
10df = pd.read_csv("classified_companies.csv")
11
12
13df.columns = df.columns.str.strip().str.lower()
14df['location'] = df['location'].str.lower().str.strip()
15df['sub_classification'] = df['sub_classification'].astype(str).str.lower().str.strip()
16df['company type'] = df['company type'].astype(str).str.strip().str.lower()
17
18
19sydney_df = df[df['location'].str.contains("sydney")]
20
21
22keywords = ['data', 'analyst', 'scientist', 'engineer']
23data_roles_df = sydney_df[sydney_df['sub_classification'].str.contains('|'.join(keywords))]
24
25
26company_type_counts = data_roles_df['company type'].value_counts()
27
28
29company_type_counts = company_type_counts.rename({
30 'recruiter': 'Recruitment Company',
31 'direct': 'Direct Hiring Company'
32}).reindex(['Direct Hiring Company', 'Recruitment Company'])
33
34
35colors = ['#ff69b4', '#ffb6c1']
36plt.figure(figsize=(6, 6))
37company_type_counts.plot(
38 kind='pie',
39 autopct='%1.1f%%',
40 startangle=90,
41 colors=colors,
42 explode=[0.05, 0.05],
43 shadow=True,
44 wedgeprops={'edgecolor': 'black'}
45)
46plt.title("Data Roles in Sydney by Company Type", fontsize=14)
47plt.ylabel("")
48plt.tight_layout()
49plt.show()