Plot chart for Data Jobs in Sydney:
1#Data Jobs in sydney2import pandas as pd3import matplotlib.pyplot as plt4from google.colab import files5uploaded = files.upload()6df = pd.read_csv('classified_companies.csv')7
8
9# Filter for Sydney only (case-insensitive match)10df_sydney = df[df['location'].str.contains('sydney', case=False, na=False)]11
12# Filter for data jobs using sub_classification column13# You can adjust the keywords as needed14data_keywords = ['data', 'analytics', 'machine learning', 'ai', 'artificial intelligence', 'ml', 'big data']15df_data_jobs = df_sydney[df_sydney['sub_classification'].str.contains('|'.join(data_keywords), case=False, na=False)]16
17# Count of each sub-classification among data jobs18data_job_counts = df_data_jobs['sub_classification'].value_counts()19
20# Plotting21plt.figure(figsize=(10, 6))22data_job_counts.plot(kind='bar', color='#ff69b4') # pinkish color23plt.title('Data Jobs in Sydney', fontsize=14)24plt.xlabel('Sub-classification')25plt.ylabel('Number of Jobs')26plt.xticks(rotation=45, ha='right')27plt.tight_layout()28plt.grid(axis='y', linestyle='--', alpha=0.7)29plt.show()Plot chart for Data job distribution across Australian cities:
1import pandas as pd2import matplotlib.pyplot as plt3
4from google.colab import files5uploaded = files.upload()6df = pd.read_csv('classified_companies.csv')7
8# Step 2: Filter valid locations in Australia9df = df[df['location'].notna()]10
11# Step 3: Identify data-related jobs12data_keywords = ['data', 'analytics', 'machine learning', 'ai', 'artificial intelligence', 'ml', 'big data']13df_data = df[df['sub_classification'].str.contains('|'.join(data_keywords), case=False, na=False)]14
15# Step 4: Extract city from location16df_data['city'] = df_data['location'].str.extract(r'(^[A-Za-z ]+)')17
18# Step 5: Group by city and sub-classification19grouped = df_data.groupby(['city', 'sub_classification']).size().unstack(fill_value=0)20
21# Optional: Limit to top N cities22top_cities = df_data['city'].value_counts().head(10).index23grouped = grouped.loc[grouped.index.isin(top_cities)]24
25# Step 6: Plot using pink/violet-only shades26plt.figure(figsize=(18, 10))27color_palette = [28 '#7B3294', # deep violet29 '#C51B7D', # dark pink30 '#DE77AE', # medium pink31 '#F1B6DA', # pastel pink32 '#E781BF', # rosy pink33 '#B03060', # raspberry34 '#D36BA4', # orchid35 '#CC6699', # dusty rose36 '#FF66CC', # bright pink37 '#993366' # wine pink38]39
40grouped.plot(kind='bar', stacked=True, color=color_palette, edgecolor='black', figsize=(18, 10))41
42plt.title('Data Job Distribution Across Australian Cities', fontsize=20)43plt.xlabel('City', fontsize=14)44plt.ylabel('Number of Jobs', fontsize=14)45plt.xticks(rotation=45, ha='right', fontsize=12)46plt.yticks(fontsize=12)47plt.legend(title='Sub-classification', bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=10)48plt.tight_layout()49plt.grid(axis='y', linestyle='--', alpha=0.6)50plt.show()