Skip to main content

Job Board Scraping

Build a job board monitoring system that tracks new postings and sends alerts.

Use case

Monitor job boards for new positions matching your criteria, alert candidates or recruiters, and track hiring trends.

What you’ll build

  • Strategy for job listing extraction
  • Regular monitoring (every 30 minutes)
  • New job detection
  • Alert system for matching positions

Prerequisites

  • Meter API key
  • TODO: Alerting system (email, Slack, Discord, etc.)
  • TODO: Target job board URLs

Step 1: Generate strategy

TODO: Replace with your job board
from meter_sdk import MeterClient
import os

client = MeterClient(api_key=os.getenv("METER_API_KEY"))

# Generate strategy for job board
strategy = client.generate_strategy(
    url="https://jobs.example.com/search?q=software+engineer",  # TODO: Your URL
    description="Extract job title, company name, location, and job URL. Ignore sponsored ads.",
    name="Software Engineer Jobs Monitor"
)

strategy_id = strategy["strategy_id"]
print(f"Strategy ID: {strategy_id}")

# Preview jobs
print("\nPreview of current postings:")
for job in strategy['preview_data'][:5]:
    print(f"  {job}")

Step 2: Set up frequent monitoring

# Check every 30 minutes for new postings
schedule = client.create_schedule(
    strategy_id=strategy_id,
    url="https://jobs.example.com/search?q=software+engineer",  # TODO: Your URL
    interval_seconds=1800  # 30 minutes
)

print(f"Schedule ID: {schedule['id']}")

Step 3: Detect and alert on new jobs

TODO: Implement job tracking and alerts
def track_new_jobs(schedule_id, job_database_file="jobs.json"):
    """Detect new jobs and send alerts"""
    import json

    # Load known jobs
    try:
        with open(job_database_file, 'r') as f:
            known_jobs = set(json.load(f))
    except FileNotFoundError:
        known_jobs = set()

    # Check for changes
    changes = client.get_schedule_changes(schedule_id, mark_seen=True)

    new_jobs = []

    if changes['count'] > 0:
        for change in changes['changes']:
            for job in change['results']:
                # TODO: Create unique job ID (depends on your data)
                # job_id = f"{job['company']}_{job['title']}"
                job_id = str(job)  # Placeholder

                if job_id not in known_jobs:
                    new_jobs.append(job)
                    known_jobs.add(job_id)

    if len(new_jobs) > 0:
        print(f"Found {len(new_jobs)} new jobs!")

        for job in new_jobs:
            # TODO: Send alert
            # send_job_alert(job)
            print(f"  NEW: {job}")

    # Save updated job database
    with open(job_database_file, 'w') as f:
        json.dump(list(known_jobs), f)

    return new_jobs

# Monitor continuously
import time

while True:
    new_jobs = track_new_jobs(schedule['id'])

    if len(new_jobs) == 0:
        print("No new jobs")

    time.sleep(1800)  # Wait 30 minutes

Step 4: Filter and match criteria

TODO: Implement job filtering
def filter_jobs(jobs, criteria):
    """Filter jobs by criteria"""
    matched = []

    for job in jobs:
        # TODO: Implement matching logic
        # location_match = criteria['location'].lower() in job.get('location', '').lower()
        # keyword_match = any(kw.lower() in job.get('title', '').lower() for kw in criteria['keywords'])

        # if location_match and keyword_match:
        #     matched.append(job)

        pass

    return matched

# Define criteria
CRITERIA = {
    "keywords": ["python", "backend", "api"],
    "location": "remote",
    "exclude": ["senior", "lead"]
}

# Filter new jobs
new_jobs = track_new_jobs(schedule_id)
matched_jobs = filter_jobs(new_jobs, CRITERIA)

if len(matched_jobs) > 0:
    print(f"Found {len(matched_jobs)} matching jobs!")
    # send_alert(matched_jobs)

Complete example

TODO: Full implementation
# job_monitor.py

from meter_sdk import MeterClient
import os
import time
import json
from datetime import datetime

client = MeterClient(api_key=os.getenv("METER_API_KEY"))

# Configuration
CONFIG = {
    "schedule_id": "your-schedule-id",  # TODO: Replace
    "check_interval": 1800,  # 30 minutes
    "criteria": {
        "keywords": ["python", "backend"],
        "location": "remote",
        "exclude": ["senior", "lead", "staff"]
    },
    "alert_email": "[email protected]"  # TODO: Replace
}

JOB_DB_FILE = "known_jobs.json"

def load_known_jobs():
    """Load known job IDs"""
    try:
        with open(JOB_DB_FILE, 'r') as f:
            return set(json.load(f))
    except FileNotFoundError:
        return set()

def save_known_jobs(known_jobs):
    """Save known job IDs"""
    with open(JOB_DB_FILE, 'w') as f:
        json.dump(list(known_jobs), f, indent=2)

def create_job_id(job):
    """Create unique job identifier"""
    # TODO: Create stable ID based on your data
    # return f"{job['company']}_{job['title']}_{job.get('location', '')}"
    return str(hash(str(job)))  # Placeholder

def matches_criteria(job, criteria):
    """Check if job matches criteria"""
    # TODO: Implement matching logic
    # title = job.get('title', '').lower()
    # location = job.get('location', '').lower()

    # Keywords match
    # keyword_match = any(kw.lower() in title for kw in criteria['keywords'])

    # Location match
    # location_match = criteria['location'].lower() in location

    # Exclude terms
    # excluded = any(term.lower() in title for term in criteria['exclude'])

    # return keyword_match and location_match and not excluded
    return True  # Placeholder

def send_alert(jobs):
    """Send alert for new jobs"""
    # TODO: Implement alerting (email, Slack, etc.)
    print(f"\n🚨 ALERT: {len(jobs)} new matching jobs!")
    for job in jobs:
        print(f"  - {job}")

def monitor():
    """Main monitoring loop"""
    known_jobs = load_known_jobs()

    while True:
        print(f"\n[{datetime.now()}] Checking for new jobs...")

        changes = client.get_schedule_changes(
            CONFIG['schedule_id'],
            mark_seen=True
        )

        if changes['count'] == 0:
            print("No new job postings")
        else:
            print(f"Processing {changes['count']} updates")

            new_matching_jobs = []

            for change in changes['changes']:
                for job in change['results']:
                    job_id = create_job_id(job)

                    if job_id not in known_jobs:
                        known_jobs.add(job_id)

                        if matches_criteria(job, CONFIG['criteria']):
                            new_matching_jobs.append(job)

            if len(new_matching_jobs) > 0:
                send_alert(new_matching_jobs)
                save_known_jobs(known_jobs)
            else:
                print("No new jobs matching criteria")

        print(f"Waiting {CONFIG['check_interval']} seconds...")
        time.sleep(CONFIG['check_interval'])

if __name__ == "__main__":
    monitor()

Multi-board monitoring

TODO: Monitor multiple job boards
JOB_BOARDS = [
    {
        "name": "Indeed",
        "schedule_id": "schedule-1",
        "criteria": {...}
    },
    {
        "name": "LinkedIn",
        "schedule_id": "schedule-2",
        "criteria": {...}
    },
    # TODO: Add more boards
]

def monitor_all_boards():
    """Monitor multiple job boards"""
    for board in JOB_BOARDS:
        print(f"\nChecking {board['name']}...")
        # Check for new jobs...

Deployment

TODO: Deploy your monitor
  • Run as systemd service
  • Use PM2 for Node.js
  • Deploy to cloud (AWS, Heroku, etc.)
  • Set up cron job

Advanced features

TODO: Extend functionality
  • Salary range tracking
  • Company research integration
  • Application tracking
  • Trend analysis (hiring surges, salary trends)

See also

Need help?

Email me at [email protected]