Plot charts for Job classification by Industry Sector(Source code)

Plot chart for Jobs in Sydney by Industry sector:

Python

1# Step 1: Upload your file
2from google.colab import files
3uploaded = files.upload()
4
5# Step 2: Imports
6import pandas as pd
7import matplotlib.pyplot as plt
8
9# Step 3: Load CSV
10df = pd.read_csv('classified_jobs_updated.csv')
11
12# Step 4: Normalize columns
13df.columns = [col.strip().lower() for col in df.columns]
14
15# Step 5: Filter for Sydney jobs (case-insensitive search)
16sydney_df = df[df.apply(lambda row: row.astype(str).str.contains('sydney', case=False).any(), axis=1)].copy()
17
18# Step 6: Use cleaned final categories
19categories = [
20    "Banking", "Insurance", "Financial Services", "Healthcare", "Medical",
21    "Education", "Defence", "Federal Government", "State Government",
22    "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation",
23    "Consulting", "Technology", "Telco", "Local Government", 
24    "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate",
25    "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 
26    "Project Management", "HR", "Others"
27]
28
29# Step 7: Count final_category values in Sydney
30category_counts = sydney_df['final_category'].value_counts().reindex(categories, fill_value=0)
31
32# Step 8: Plotting (Horizontal Bar Chart)
33plt.figure(figsize=(12, 10))
34category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black')
35
36plt.title('Number of Jobs in Sydney by Industry Sector', fontsize=16)
37plt.xlabel('Number of Jobs')
38plt.ylabel('Industry Sector')
39plt.grid(axis='x', linestyle='--', alpha=0.7)
40plt.tight_layout()
41plt.show()

Plot charts for Jobs in each cities by Industry sector:

Python

1# Step 1: Upload updated file
2from google.colab import files
3uploaded = files.upload()
4
5# Step 2: Imports
6import pandas as pd
7import matplotlib.pyplot as plt
8import re
9
10# Step 3: Load the cleaned dataset
11df = pd.read_csv('classified_jobs_updated.csv')
12
13# Step 4: Normalize column names
14df.columns = [col.strip().lower() for col in df.columns]
15
16# Step 5: Use the updated category list
17categories = [
18    "Banking", "Insurance", "Financial Services", "Healthcare", "Medical",
19    "Education", "Defence", "Federal Government", "State Government",
20    "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation",
21    "Consulting", "Technology", "Telco", "Local Government", 
22    "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate",
23    "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 
24    "Project Management", "HR", "Others"
25]
26
27# Step 6: Detect the location column
28location_column = None
29for col in df.columns:
30    if 'location' in col or 'city' in col:
31        location_column = col
32        break
33
34if not location_column:
35    raise ValueError("Couldn't find a location or city column.")
36
37# Step 7: Extract normalized city name
38def extract_city(text):
39    text = str(text).lower()
40    match = re.search(r'\b([a-z ]+)', text)
41    return match.group(1).strip().title() if match else "Unknown"
42
43df['city'] = df[location_column].apply(extract_city)
44
45# Step 8: Plot horizontal bar chart per city
46cities = df['city'].value_counts().index.tolist()
47
48for city in cities:
49    city_df = df[df['city'] == city]
50    category_counts = city_df['final_category'].value_counts().reindex(categories, fill_value=0)
51
52    plt.figure(figsize=(12, 6))
53    category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black')
54    plt.title(f'Job Categories in {city}', fontsize=14)
55    plt.xlabel('Number of Jobs')
56    plt.ylabel('Category')
57    plt.grid(axis='x', linestyle='--', alpha=0.7)
58    plt.tight_layout()
59    plt.show()

Plot charts for Jobs in Adelaide SA, Brisbane QLD, Canberra ACT, Melbourne VIC, Perth WA, Sydney NSW by Industry sector:

Python

1# Upload the CSV file
2from google.colab import files
3uploaded = files.upload()
4
5# Imports
6import pandas as pd
7import matplotlib.pyplot as plt
8import re
9
10# Load the CSV
11df = pd.read_csv('classified_jobs_updated.csv')
12
13# Normalize column names
14df.columns = [col.strip().lower() for col in df.columns]
15
16# Use the cleaned final categories you provided
17categories = [
18    "Banking", "Insurance", "Financial Services", "Healthcare", "Medical",
19    "Education", "Defence", "Federal Government", "State Government",
20    "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation",
21    "Consulting", "Technology", "Telco", "Local Government", 
22    "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate",
23    "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 
24    "Project Management", "HR", "Others"
25]
26
27# Try to find location column
28location_column = None
29for col in df.columns:
30    if 'location' in col or 'city' in col:
31        location_column = col
32        break
33
34if not location_column:
35    raise ValueError("Couldn't find a location or city column.")
36
37# Clean and normalize full city names from location column
38def extract_full_location(text):
39    text = str(text).lower()
40    match = re.search(r'\b([a-z ]+)\s+(sa|qld|act|vic|wa|nsw)\b', text)
41    if match:
42        city = match.group(1).strip().title()
43        state = match.group(2).upper()
44        return f"{city} {state}"
45    return "Unknown"
46
47df['full_city'] = df[location_column].apply(extract_full_location)
48
49# Define the 6 major cities you want to plot
50target_cities = [
51    "Adelaide SA", "Brisbane QLD", "Canberra ACT",
52    "Melbourne VIC", "Perth WA", "Sydney NSW"
53]
54
55# Plot charts only for those cities using 'final_category'
56for city in target_cities:
57    city_df = df[df['full_city'] == city]
58    if city_df.empty:
59        print(f"⚠️ No data found for {city}")
60        continue
61
62    category_counts = city_df['final_category'].value_counts().reindex(categories, fill_value=0)
63
64    plt.figure(figsize=(12, 6))
65    category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black')
66    plt.title(f'Job Categories in {city}', fontsize=14)
67    plt.xlabel('Number of Jobs')
68    plt.ylabel('Category')
69    plt.grid(axis='x', linestyle='--', alpha=0.7)
70    plt.tight_layout()
71    plt.show()

Plot Stacked bar chart for Job category distribution by Australian cities:

Python

1# Upload CSV file
2from google.colab import files
3uploaded = files.upload()
4
5# Imports
6import pandas as pd
7import matplotlib.pyplot as plt
8import re
9
10# Load the CSV
11df = pd.read_csv('classified_jobs_updated.csv')
12
13# Normalize column names
14df.columns = [col.strip().lower() for col in df.columns]
15
16# Use the cleaned final categories you provided
17categories = [
18    "Banking", "Insurance", "Financial Services", "Healthcare", "Medical",
19    "Education", "Defence", "Federal Government", "State Government",
20    "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation",
21    "Consulting", "Technology", "Telco", "Local Government",
22    "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate",
23    "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity",
24    "Project Management", "HR", "Others"
25]
26
27# Find location column (city or location)
28location_column = None
29for col in df.columns:
30    if 'location' in col or 'city' in col:
31        location_column = col
32        break
33
34if not location_column:
35    raise ValueError("Couldn't find a location or city column.")
36
37# Extract full city name + state abbreviation
38def extract_full_location(text):
39    text = str(text).lower()
40    match = re.search(r'\b([a-z ]+)\s+(sa|qld|act|vic|wa|nsw)\b', text)
41    if match:
42        city = match.group(1).strip().title()
43        state = match.group(2).upper()
44        return f"{city} {state}"
45    return "Unknown"
46
47df['full_city'] = df[location_column].apply(extract_full_location)
48
49# Define the 6 target cities
50target_cities = [
51    "Adelaide SA", "Brisbane QLD", "Canberra ACT",
52    "Melbourne VIC", "Perth WA", "Sydney NSW"
53]
54
55# Filter for those cities
56filtered_df = df[df['full_city'].isin(target_cities)]
57
58# Build pivot table for stacked bar plot with final_category
59pivot = pd.pivot_table(
60    filtered_df,
61    index='full_city',
62    columns='final_category',
63    values='classified_job',
64    aggfunc='count',
65    fill_value=0
66)
67
68# Reindex to ensure consistent order
69pivot = pivot.reindex(index=target_cities, columns=categories, fill_value=0)
70
71# Define pink/violet color palette (extend if needed to match categories count)
72color_palette = [
73    "#FFC0CB", "#FFB6C1", "#FF69B4", "#FF1493", "#DB7093",
74    "#C71585", "#D8BFD8", "#DDA0DD", "#DA70D6", "#BA55D3",
75    "#9370DB", "#8A2BE2", "#9400D3", "#9932CC", "#8B008B",
76    "#E6A9EC", "#DE5D83", "#D896FF", "#D291BC", "#B57EDC",
77    "#9B59B6", "#8E44AD", "#7D3C98", "#6C3483", "#5B2C6F",
78    "#4A235A", "#3C1E53", "#2E1450", "#201247", "#120A3F"
79][:len(categories)]  # truncate or extend to categories length
80
81# Plot stacked bar chart
82pivot.plot(kind='bar', stacked=True, figsize=(20, 15), color=color_palette)
83
84plt.title('Stacked Job Category Distribution by City', fontsize=16)
85plt.xlabel('City')
86plt.ylabel('Number of Jobs')
87plt.xticks(rotation=45, ha='right')
88plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
89plt.tight_layout()
90plt.grid(axis='y')
91plt.show()

Congratulations! You've Completed the Course

You've successfully completed all 42 chapters of this course.

You might also be interested in

Digital Twin III – “The Cyber-Hardened Portfolio”

Hack me if you can! Your portfolio is no longer just a showcase of your achievements — it is now an active target. Every professional Web application backed by a database of users is a potential entry point for attackers, and the developers who succeed are those who can defend, monitor, and continuously harden their systems. Digital Twin III challenges you to transform your personal portfolio into a cyber-secured, intelligence-driven digital asset — one that not only looks impressive, but proves its resilience under real-world conditions. This is where your skills move beyond basic deployment. You will implement a secure content management system, protect private user data, integrate defensive controls like WAF and firewalls, and design visible countermeasures against threats such as: * SQL injection * Prompt injection * Authentication/authorization failures * Broken access control * Malicious payloads * Automated bot attacks Your portfolio becomes a live cyber lab — built to be tested, attacked, and improved through real telemetry. You will upload evidence of each security layer: logs, attack statistics, CVSS scoring, risk reports, penetration test results, remediation notes, and resilience patterns. Your Digital Twin doesn’t claim to be secure — it demonstrates it. By the end of this course, your public website will: * Host your professional identity & project content * Detect and block real cyber threats in real-time * Analyse attacker behaviours * Communicate your cyber maturity to employers * Show your ability to manage security as a lifecycle — not a checkbox This is your opportunity to build something professionally defensible — a deployable, auditable case study that proves you understand the realities of modern cyber security. Welcome to Digital Twin III — the version of you that cannot be exploited.

Digital Twin II (Web and Voice Agent)

Digital Twin II is a hands-on, full-stack AI engineering project focused on turning you into a web-accessible and voice-accessible AI persona. Unlike Digital Twin I, which was centred heavily on retrieval-augmented intelligence (RAG), this course emphasises interaction, identity, and experience design. The goal is to build a fully functional chat- or voice-enabled Digital Twin that lives on the web and can autonomously communicate with visitors — particularly recruiters, hiring managers, and potential collaborators — while reflecting your personality, skills, and professional brand. You will build a production-style application that: • Has a real frontend and user experience • Stores and tracks conversations and leads • Handles scheduling and CTAs (Call-To-Action actions) • Optionally supports phone calls and voice-driven interactions This course is specifically designed for developers who already possess: ✔Modern web development knowledge (React, Next.js, TypeScript) ✔ Experience with CRUD, authentication, and full-stack workflows ✔ Understanding of spec-driven development and GitHub workflows ✔ Familiarity with agentic coding tools (Copilot, Claude Opus 4.5+) If Digital Twin I defined the intelligence, Digital Twin II defines the presence.

Digital Twin I (RAG Solution)

This course centres on a live industry project where you design and deploy a "Digital Twin"—a personal AI agent capable of autonomously representing its creator in professional job interviews. By leveraging Retrieval-Augmented Generation (RAG) and the Model Context Protocol (MCP), you will build a system that can semantically search its own professional history to provide factual, context-aware answers to recruiters and hiring managers. You will move from theory to application by mastering the following technical domains: • RAG Architecture: Implementing semantic search using vector databases to ground AI responses in factual studies and professional experiences. • MCP Server Development: Building Model Context Protocol servers (using Next.js/TypeScript) to integrate local data with AI agents. • Data Pipeline Engineering: Annotating, enriching, and embedding professional profiles (JSON) into vector storage. • AI-Powered Workflow: Utilising VS Code Insiders and GitHub Copilot to drive development and simulate agentic behaviours. • Team Collaboration: Managing a software lifecycle using GitHub for version control (Pull Requests, branches) and ClickUp for project management

1# Step 1: Upload your file 2from google.colab import files 3uploaded = files.upload() 4 5# Step 2: Imports 6import pandas as pd 7import matplotlib.pyplot as plt 8 9# Step 3: Load CSV 10df = pd.read_csv('classified_jobs_updated.csv') 11 12# Step 4: Normalize columns 13df.columns = [col.strip().lower() for col in df.columns] 14 15# Step 5: Filter for Sydney jobs (case-insensitive search) 16sydney_df = df[df.apply(lambda row: row.astype(str).str.contains('sydney', case=False).any(), axis=1)].copy() 17 18# Step 6: Use cleaned final categories 19categories = [ 20 "Banking", "Insurance", "Financial Services", "Healthcare", "Medical", 21 "Education", "Defence", "Federal Government", "State Government", 22 "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation", 23 "Consulting", "Technology", "Telco", "Local Government", 24 "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate", 25 "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 26 "Project Management", "HR", "Others" 27] 28 29# Step 7: Count final_category values in Sydney 30category_counts = sydney_df['final_category'].value_counts().reindex(categories, fill_value=0) 31 32# Step 8: Plotting (Horizontal Bar Chart) 33plt.figure(figsize=(12, 10)) 34category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black') 35 36plt.title('Number of Jobs in Sydney by Industry Sector', fontsize=16) 37plt.xlabel('Number of Jobs') 38plt.ylabel('Industry Sector') 39plt.grid(axis='x', linestyle='--', alpha=0.7) 40plt.tight_layout() 41plt.show()

1# Step 1: Upload updated file 2from google.colab import files 3uploaded = files.upload() 4 5# Step 2: Imports 6import pandas as pd 7import matplotlib.pyplot as plt 8import re 9 10# Step 3: Load the cleaned dataset 11df = pd.read_csv('classified_jobs_updated.csv') 12 13# Step 4: Normalize column names 14df.columns = [col.strip().lower() for col in df.columns] 15 16# Step 5: Use the updated category list 17categories = [ 18 "Banking", "Insurance", "Financial Services", "Healthcare", "Medical", 19 "Education", "Defence", "Federal Government", "State Government", 20 "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation", 21 "Consulting", "Technology", "Telco", "Local Government", 22 "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate", 23 "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 24 "Project Management", "HR", "Others" 25] 26 27# Step 6: Detect the location column 28location_column = None 29for col in df.columns: 30 if 'location' in col or 'city' in col: 31 location_column = col 32 break 33 34if not location_column: 35 raise ValueError("Couldn't find a location or city column.") 36 37# Step 7: Extract normalized city name 38def extract_city(text): 39 text = str(text).lower() 40 match = re.search(r'\b([a-z ]+)', text) 41 return match.group(1).strip().title() if match else "Unknown" 42 43df['city'] = df[location_column].apply(extract_city) 44 45# Step 8: Plot horizontal bar chart per city 46cities = df['city'].value_counts().index.tolist() 47 48for city in cities: 49 city_df = df[df['city'] == city] 50 category_counts = city_df['final_category'].value_counts().reindex(categories, fill_value=0) 51 52 plt.figure(figsize=(12, 6)) 53 category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black') 54 plt.title(f'Job Categories in {city}', fontsize=14) 55 plt.xlabel('Number of Jobs') 56 plt.ylabel('Category') 57 plt.grid(axis='x', linestyle='--', alpha=0.7) 58 plt.tight_layout() 59 plt.show()

1# Upload the CSV file 2from google.colab import files 3uploaded = files.upload() 4 5# Imports 6import pandas as pd 7import matplotlib.pyplot as plt 8import re 9 10# Load the CSV 11df = pd.read_csv('classified_jobs_updated.csv') 12 13# Normalize column names 14df.columns = [col.strip().lower() for col in df.columns] 15 16# Use the cleaned final categories you provided 17categories = [ 18 "Banking", "Insurance", "Financial Services", "Healthcare", "Medical", 19 "Education", "Defence", "Federal Government", "State Government", 20 "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation", 21 "Consulting", "Technology", "Telco", "Local Government", 22 "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate", 23 "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 24 "Project Management", "HR", "Others" 25] 26 27# Try to find location column 28location_column = None 29for col in df.columns: 30 if 'location' in col or 'city' in col: 31 location_column = col 32 break 33 34if not location_column: 35 raise ValueError("Couldn't find a location or city column.") 36 37# Clean and normalize full city names from location column 38def extract_full_location(text): 39 text = str(text).lower() 40 match = re.search(r'\b([a-z ]+)\s+(sa|qld|act|vic|wa|nsw)\b', text) 41 if match: 42 city = match.group(1).strip().title() 43 state = match.group(2).upper() 44 return f"{city} {state}" 45 return "Unknown" 46 47df['full_city'] = df[location_column].apply(extract_full_location) 48 49# Define the 6 major cities you want to plot 50target_cities = [ 51 "Adelaide SA", "Brisbane QLD", "Canberra ACT", 52 "Melbourne VIC", "Perth WA", "Sydney NSW" 53] 54 55# Plot charts only for those cities using 'final_category' 56for city in target_cities: 57 city_df = df[df['full_city'] == city] 58 if city_df.empty: 59 print(f"⚠️ No data found for {city}") 60 continue 61 62 category_counts = city_df['final_category'].value_counts().reindex(categories, fill_value=0) 63 64 plt.figure(figsize=(12, 6)) 65 category_counts.sort_values().plot(kind='barh', color='hotpink', edgecolor='black') 66 plt.title(f'Job Categories in {city}', fontsize=14) 67 plt.xlabel('Number of Jobs') 68 plt.ylabel('Category') 69 plt.grid(axis='x', linestyle='--', alpha=0.7) 70 plt.tight_layout() 71 plt.show()

1# Upload CSV file 2from google.colab import files 3uploaded = files.upload() 4 5# Imports 6import pandas as pd 7import matplotlib.pyplot as plt 8import re 9 10# Load the CSV 11df = pd.read_csv('classified_jobs_updated.csv') 12 13# Normalize column names 14df.columns = [col.strip().lower() for col in df.columns] 15 16# Use the cleaned final categories you provided 17categories = [ 18 "Banking", "Insurance", "Financial Services", "Healthcare", "Medical", 19 "Education", "Defence", "Federal Government", "State Government", 20 "Retail", "Hospitality", "Food", "FMCG", "Tourism", "Aviation", 21 "Consulting", "Technology", "Telco", "Local Government", 22 "Mining", "Energy", "Engineering", "Construction", "Legal", "Real Estate", 23 "Marketing", "Sales", "IT Support", "Data Science", "DevOps", "Cybersecurity", 24 "Project Management", "HR", "Others" 25] 26 27# Find location column (city or location) 28location_column = None 29for col in df.columns: 30 if 'location' in col or 'city' in col: 31 location_column = col 32 break 33 34if not location_column: 35 raise ValueError("Couldn't find a location or city column.") 36 37# Extract full city name + state abbreviation 38def extract_full_location(text): 39 text = str(text).lower() 40 match = re.search(r'\b([a-z ]+)\s+(sa|qld|act|vic|wa|nsw)\b', text) 41 if match: 42 city = match.group(1).strip().title() 43 state = match.group(2).upper() 44 return f"{city} {state}" 45 return "Unknown" 46 47df['full_city'] = df[location_column].apply(extract_full_location) 48 49# Define the 6 target cities 50target_cities = [ 51 "Adelaide SA", "Brisbane QLD", "Canberra ACT", 52 "Melbourne VIC", "Perth WA", "Sydney NSW" 53] 54 55# Filter for those cities 56filtered_df = df[df['full_city'].isin(target_cities)] 57 58# Build pivot table for stacked bar plot with final_category 59pivot = pd.pivot_table( 60 filtered_df, 61 index='full_city', 62 columns='final_category', 63 values='classified_job', 64 aggfunc='count', 65 fill_value=0 66) 67 68# Reindex to ensure consistent order 69pivot = pivot.reindex(index=target_cities, columns=categories, fill_value=0) 70 71# Define pink/violet color palette (extend if needed to match categories count) 72color_palette = [ 73 "#FFC0CB", "#FFB6C1", "#FF69B4", "#FF1493", "#DB7093", 74 "#C71585", "#D8BFD8", "#DDA0DD", "#DA70D6", "#BA55D3", 75 "#9370DB", "#8A2BE2", "#9400D3", "#9932CC", "#8B008B", 76 "#E6A9EC", "#DE5D83", "#D896FF", "#D291BC", "#B57EDC", 77 "#9B59B6", "#8E44AD", "#7D3C98", "#6C3483", "#5B2C6F", 78 "#4A235A", "#3C1E53", "#2E1450", "#201247", "#120A3F" 79][:len(categories)] # truncate or extend to categories length 80 81# Plot stacked bar chart 82pivot.plot(kind='bar', stacked=True, figsize=(20, 15), color=color_palette) 83 84plt.title('Stacked Job Category Distribution by City', fontsize=16) 85plt.xlabel('City') 86plt.ylabel('Number of Jobs') 87plt.xticks(rotation=45, ha='right') 88plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left') 89plt.tight_layout() 90plt.grid(axis='y') 91plt.show()