1import pandas as pd2import numpy as np3import matplotlib.pyplot as plt4import seaborn as sns5import re6from google.colab import files7
8uploaded = files.upload()9df = pd.read_csv("seek_jobs.csv")10
11text_cols = [col for col in df.columns if 'description' in col.lower() or 'details' in col.lower()]12df['combined_text'] = df[text_cols].fillna('').agg(' '.join, axis=1)13
14
15def extract_experience(text):16 text = text.lower()17
18 patterns = [19 r'(\d+)[\s\-to]+(\d+)\s+years?\s+(?:of\s+)?experience', 20 r'(\d+)\+?\s+years?\s+(?:of\s+)?experience', 21 r'more than (\d+)\s+years?\s+(?:of\s+)?experience', 22 r'minimum of (\d+)\s+years?\s+(?:of\s+)?experience', 23 r'at least (\d+)\s+years?\s+(?:of\s+)?experience', 24 r'requires? (\d+)\s+years?\s+(?:of\s+)?experience', 25 ]26
27 min_exp, max_exp = [], []28 for pattern in patterns:29 matches = re.findall(pattern, text)30 for match in matches:31 if isinstance(match, tuple):32 val1, val2 = int(match[0]), int(match[1])33 else:34 val1 = val2 = int(match)35
36 37 if 0 < val1 <= 40 and 0 < val2 <= 40:38 min_exp.append(val1)39 max_exp.append(val2)40
41 if min_exp and max_exp:42 min_val, max_val = min(min_exp), max(max_exp)43 elif min_exp:44 min_val = max_val = min(min_exp)45 else:46 return pd.Series([np.nan, np.nan])47
48 return pd.Series([min_val, max_val])49
50
51df[['min_experience', 'max_experience']] = df['combined_text'].apply(extract_experience)52
53
54
55df_exp = df.dropna(subset=['min_experience', 'max_experience'])56
57
58# Min/Max experience by city59'''plt.figure(figsize=(28, 10))60cities = avg_exp.index61plt.plot(cities, avg_exp['min_experience'], marker='o', label='Min Experience', color='#ffb6c1', linewidth=2)62plt.plot(cities, avg_exp['max_experience'], marker='o', label='Max Experience', color='#db7093', linewidth=2)63
64# Optional: fill between min and max for better visual65plt.fill_between(cities, avg_exp['min_experience'], avg_exp['max_experience'], color='#f8bbd0', alpha=0.3)66
67plt.title('Line Chart of Min and Max Years of Experience by City', fontsize=14, weight='bold')68plt.ylabel('Years of Experience', fontsize=12)69plt.xlabel('City', fontsize=12)70plt.xticks(rotation=45, ha='right', fontsize=10)71plt.yticks(fontsize=10)72plt.grid(True, linestyle='--', alpha=0.6)73plt.legend()74plt.tight_layout()75plt.show()'''76
77
78
79def bucket_exp(x):80 if x <= 3: return '0-3'81 elif x <= 5: return '3+'82 elif x <= 7: return '5-7'83 elif x <= 10: return '5+'84 else: return '10+'85
86df_exp['exp_bucket'] = df_exp['min_experience'].apply(bucket_exp)87bucket_counts = pd.crosstab(df_exp['location'], df_exp['exp_bucket'])88
89
90
91# Stacked experience range count per city with pinkish theme92'''bucket_counts.plot(93 kind='bar',94 stacked=True,95 figsize=(28, 10), # Adjusted figsize96 color=['#ffb6c1', '#ff69b4', '#db7093', '#c71585'], # Pink shades97 edgecolor='black'98)99plt.title('Stacked Chart of Experience Buckets by City', fontsize=14, weight='bold')100plt.ylabel('Job Count', fontsize=12)101plt.xlabel('City', fontsize=12)102plt.xticks(rotation=45, ha='right', fontsize=10, rotation_mode='anchor') # Adjust rotation and alignment103plt.yticks(fontsize=10)104plt.legend(title='Experience Bucket')105plt.grid(axis='y', linestyle='--', alpha=0.6)106plt.tight_layout()107plt.show()'''108
109
110
111print(df_exp[['job_title', 'min_experience', 'max_experience']])112
113
114
115salary_cols = [col for col in df.columns if 'salary' in col.lower()]116print("Salary columns found:", salary_cols)117
118
119salary_col = salary_cols[0] if salary_cols else None120
121# Filter for 'Data Analyst' in Sydney122'''if salary_col:123 df_filtered = df_exp[124 df_exp['job_title'].str.lower().str.contains('data analyst', na=False) &125 df_exp['location'].str.lower().str.contains('sydney', na=False) &126 df_exp[salary_col].notnull()127 ]128
129 # Scatter plot: Salary vs Experience130 plt.figure(figsize=(10, 6))131 plt.scatter(df_filtered['min_experience'], df_filtered[salary_col], alpha=0.7, color='#db7093', edgecolor='black')132 plt.title('Salary vs. Years of Experience for Data Analysts in Sydney', fontsize=14, weight='bold')133 plt.xlabel('Years of Experience (Min)', fontsize=12)134 plt.ylabel('Salary', fontsize=12)135 plt.grid(True, linestyle='--', alpha=0.6)136 plt.tight_layout()137 plt.show()138else:139 print("No salary column found to plot.")'''
Uncomment the lines of code for plotting the graph or charts for visualization.
You've completed this chapter! 🎉