Plot chart for Rank of top Computer Language for Data Jobs in Sydney:
1import pandas as pd2import matplotlib.pyplot as plt3from google.colab import files4
5# ---- Upload CSV file ---- #6uploaded = files.upload()7
8# ---- Load dataset ---- #9df = pd.read_csv("language_extraction_checkpoint.csv")10
11# Define column names12city_col = 'location'13lang_col = 'extracted_programming_languages_str' # Adjust if needed14
15# Drop rows with missing data16df = df.dropna(subset=[city_col, lang_col]).copy()17
18# Normalize and split language strings19df[lang_col] = df[lang_col].str.lower()20df['language_list'] = df[lang_col].str.split(',')21
22# Explode list to rows23df_exploded = df.explode('language_list')24df_exploded['language_list'] = df_exploded['language_list'].str.strip()25
26# Filter for Sydney rows27df_sydney = df_exploded[df_exploded[city_col].str.lower().str.contains('sydney')]28
29# Count frequency of each language30top_n = 1031language_counts = df_sydney['language_list'].value_counts().head(top_n)32
33# Plot horizontal bar chart with pink shading34plt.figure(figsize=(12, 8))35language_counts.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')36
37plt.title(f'Top {top_n} Programming Languages Used in Sydney Data Jobs', fontsize=16)38plt.xlabel('Number of Jobs')39plt.ylabel('Programming Language')40plt.grid(axis='x', linestyle='--', alpha=0.7)41plt.tight_layout()42plt.show()Plot chart for Rank of top Computer Language for Data Jobs in all cities in Australia:
1# Top Programming Languages Across All Australian Cities2
3import pandas as pd4import matplotlib.pyplot as plt5from google.colab import files6
7# ---- Upload CSV file ---- #8uploaded = files.upload()9
10# ---- Load dataset ---- #11df = pd.read_csv("language_extraction_checkpoint.csv")12
13# Define column names (adjust if needed)14city_col = 'location'15lang_col = 'extracted_programming_languages_str'16
17# Drop rows with missing data18df = df.dropna(subset=[city_col, lang_col]).copy()19
20# Normalize text and split language strings21df[lang_col] = df[lang_col].str.lower()22df['language_list'] = df[lang_col].str.split(',')23
24# Explode the list to individual rows25df_exploded = df.explode('language_list')26df_exploded['language_list'] = df_exploded['language_list'].str.strip()27
28# Filter for rows where the location is in Australia29australian_cities_keywords = [30 'sydney', 'melbourne', 'brisbane', 'perth', 'adelaide', 'canberra',31 'hobart', 'darwin', 'australia'32]33df_aus = df_exploded[df_exploded[city_col].str.lower().str.contains('|'.join(australian_cities_keywords))]34
35# Count frequency of each language36top_n = 1037language_counts_aus = df_aus['language_list'].value_counts().head(top_n)38
39# Plot horizontal bar chart with pink shade and no labels40plt.figure(figsize=(12, 8))41language_counts_aus.sort_values().plot(42 kind='barh',43 color='#ff66b2',44 edgecolor='black'45)46
47# Title and axis labels48plt.title(f'Top {top_n} Programming Languages in Data Jobs Across Australian Cities', fontsize=16)49plt.xlabel('Number of Job Listings')50plt.ylabel('Programming Language')51plt.grid(axis='x', linestyle='--', alpha=0.7)52plt.tight_layout()53plt.show()