Plot chart for Rank of top Soft Skills for Data Jobs in Sydney:
1import pandas as pd2import matplotlib.pyplot as plt3from google.colab import files4
5# ---- Upload CSV file ---- #6uploaded = files.upload()7
8# ---- Load dataset ---- #9df = pd.read_csv("skills_extraction_checkpoint.csv")10
11# Define column names12city_col = 'location'13soft_skill_col = 'extracted_soft_skills_str' # Change if needed14
15# Drop rows with missing data16df = df.dropna(subset=[city_col, soft_skill_col]).copy()17
18# Normalize and split soft skill strings19df[soft_skill_col] = df[soft_skill_col].str.lower()20df['soft_skill_list'] = df[soft_skill_col].str.split(',')21
22# Explode to individual soft skills23df_exploded = df.explode('soft_skill_list')24df_exploded['soft_skill_list'] = df_exploded['soft_skill_list'].str.strip()25
26# Filter only Sydney entries27df_sydney = df_exploded[df_exploded[city_col].str.lower().str.contains('sydney')]28
29# Count top soft skills30top_n = 1031soft_skill_counts = df_sydney['soft_skill_list'].value_counts().head(top_n)32
33# Plot horizontal bar chart in pink shade (without value labels)34plt.figure(figsize=(12, 8))35soft_skill_counts.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')36
37# Formatting38plt.title(f'Top {top_n} Soft Skills in Sydney Data Jobs', fontsize=16)39plt.xlabel('Number of Job Listings')40plt.ylabel('Soft Skill')41plt.grid(axis='x', linestyle='--', alpha=0.7)42plt.tight_layout()43plt.show()Plot chart for Rank of top Soft Skills for Data Jobs in all cities in Australia:
1# Exercise 10 - Top Soft Skills for Data Jobs Across Australian Cities (No Value Labels, Pink Chart)2
3import pandas as pd4import matplotlib.pyplot as plt5from google.colab import files6
7# ---- Upload CSV file ---- #8uploaded = files.upload()9
10# ---- Load dataset ---- #11df = pd.read_csv("skills_extraction_checkpoint.csv")12
13# Define column names14city_col = 'location'15soft_skill_col = 'extracted_soft_skills_str' # Change if needed16
17# Drop rows with missing data18df = df.dropna(subset=[city_col, soft_skill_col]).copy()19
20# Normalize and split soft skill strings21df[soft_skill_col] = df[soft_skill_col].str.lower()22df['soft_skill_list'] = df[soft_skill_col].str.split(',')23
24# Explode to individual soft skills25df_exploded = df.explode('soft_skill_list')26df_exploded['soft_skill_list'] = df_exploded['soft_skill_list'].str.strip()27
28# Filter for rows where the location is in Australia29australian_cities_keywords = [30 'sydney', 'melbourne', 'brisbane', 'perth', 'adelaide', 'canberra',31 'hobart', 'darwin', 'australia'32]33df_aus = df_exploded[df_exploded[city_col].str.lower().str.contains('|'.join(australian_cities_keywords))]34
35# Count top soft skills36top_n = 1037soft_skill_counts_aus = df_aus['soft_skill_list'].value_counts().head(top_n)38
39# Plot horizontal bar chart in pink shade (without value labels)40plt.figure(figsize=(12, 8))41soft_skill_counts_aus.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')42
43# Formatting44plt.title(f'Top {top_n} Soft Skills in Data Jobs Across Australian Cities', fontsize=16)45plt.xlabel('Number of Job Listings')46plt.ylabel('Soft Skill')47plt.grid(axis='x', linestyle='--', alpha=0.7)48plt.tight_layout()49plt.show()Plot General chart for soft skills across all jobs in Sydney:
1#General chart for soft skills across all jobs in sydney2
3import pandas as pd4import matplotlib.pyplot as plt5from google.colab import files6
7# ---- Upload CSV file ---- #8uploaded = files.upload()9
10# ---- Load dataset ---- #11df = pd.read_csv("skills_extraction_checkpoint.csv")12
13# Define column names14city_col = 'location'15soft_skill_col = 'extracted_soft_skills_str' # Change if needed16
17# Drop rows with missing data18df = df.dropna(subset=[city_col, soft_skill_col]).copy()19
20# Normalize and split soft skill strings21df[soft_skill_col] = df[soft_skill_col].str.lower()22df['soft_skill_list'] = df[soft_skill_col].str.split(',')23
24# Explode to individual soft skills25df_exploded = df.explode('soft_skill_list')26df_exploded['soft_skill_list'] = df_exploded['soft_skill_list'].str.strip()27
28# Filter only Sydney entries (all jobs, not data jobs)29df_sydney_all_jobs = df_exploded[df_exploded[city_col].str.lower().str.contains('sydney')]30
31# Count top soft skills32top_n = 1033soft_skill_counts_sydney = df_sydney_all_jobs['soft_skill_list'].value_counts().head(top_n)34
35# Plot horizontal bar chart in pink shade (without value labels)36plt.figure(figsize=(12, 8))37soft_skill_counts_sydney.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')38
39# Formatting40plt.title(f'Top {top_n} Soft Skills in All Jobs in Sydney', fontsize=16)41plt.xlabel('Number of Job Listings')42plt.ylabel('Soft Skill')43plt.grid(axis='x', linestyle='--', alpha=0.7)44plt.tight_layout()45plt.show()