Plot chart for Rank of top Tools for Data Jobs in Sydney:
1import pandas as pd2import matplotlib.pyplot as plt3from google.colab import files4
5# Upload the CSV file6uploaded = files.upload()7
8# Load dataset9df = pd.read_csv("extraction_checkpoint.csv")10
11# Column names (adjust if needed)12city_col = 'location'13tools_col = 'extracted_tools_str'14
15# Drop missing values16df = df.dropna(subset=[city_col, tools_col]).copy()17
18# Lowercase and split tool strings19df[tools_col] = df[tools_col].str.lower()20df['tool_list'] = df[tools_col].str.split(',')21
22# Explode tool list to individual rows23df_exploded = df.explode('tool_list')24df_exploded['tool_list'] = df_exploded['tool_list'].str.strip()25
26# Filter for Sydney27df_sydney = df_exploded[df_exploded[city_col].str.lower().str.contains('sydney')]28
29# Remove known programming languages30languages_to_exclude = {31 'python', 'r', 'go', 'java', 'c', 'c++', 'c#', 'scala', 'typescript',32 'javascript', 'sql', 'ruby', 'rust', 'kotlin', 'perl', 'php', 'swift',33 'matlab', 'dart', 'bash', 'powershell', 'haskell', 'react', 'agile', 'node.js'34}35df_sydney = df_sydney[~df_sydney['tool_list'].isin(languages_to_exclude)]36
37# Count tool frequency38top_n = 1039tool_counts = df_sydney['tool_list'].value_counts().head(top_n)40
41# Plot horizontal bar chart42plt.figure(figsize=(12, 8))43tool_counts.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')44
45plt.title(f'Top {top_n} Tools Used in Sydney Data Jobs ', fontsize=16)46plt.xlabel('Number of Jobs')47plt.ylabel('Tool')48plt.grid(axis='x', linestyle='--', alpha=0.7)49plt.tight_layout()50plt.show()Plot chart for Rank of top Tools for Data Jobs in all cities in Australia:
1#Tools across in Australia2import pandas as pd3import matplotlib.pyplot as plt4from google.colab import files5
6# Upload the CSV file7uploaded = files.upload()8
9# Load dataset10df = pd.read_csv("extraction_checkpoint.csv")11
12# Column names (adjust if needed)13city_col = 'location'14tools_col = 'extracted_tools_str'15
16# Drop missing values17df = df.dropna(subset=[city_col, tools_col]).copy()18
19# Lowercase and split tool strings20df[tools_col] = df[tools_col].str.lower()21df['tool_list'] = df[tools_col].str.split(',')22
23# Explode tool list to individual rows24df_exploded = df.explode('tool_list')25df_exploded['tool_list'] = df_exploded['tool_list'].str.strip()26
27# Filter for Australian cities (basic keyword match, can be refined)28df_au = df_exploded[df_exploded[city_col].str.lower().str.contains(29 'sydney|melbourne|brisbane|perth|adelaide|canberra|hobart|darwin'30)]31
32# Remove known programming languages33languages_to_exclude = {34 'python', 'r', 'go', 'java', 'c', 'c++', 'c#', 'scala', 'typescript',35 'javascript', 'sql', 'ruby', 'rust', 'kotlin', 'perl', 'php', 'swift',36 'matlab', 'dart', 'bash', 'powershell', 'haskell', 'react', 'agile', 'node.js'37}38df_au = df_au[~df_au['tool_list'].isin(languages_to_exclude)]39
40# Count tool frequency41top_n = 1042tool_counts = df_au['tool_list'].value_counts().head(top_n)43
44# Plot horizontal bar chart45plt.figure(figsize=(12, 8))46tool_counts.sort_values().plot(kind='barh', color='#ff66b2', edgecolor='black')47
48plt.title(f'Top {top_n} Tools Used in Data Jobs Across Australian Cities', fontsize=16)49plt.xlabel('Number of Jobs')50plt.ylabel('Tool')51plt.grid(axis='x', linestyle='--', alpha=0.7)52plt.tight_layout()53plt.show()