GCP Skills Boost Updates#

from bs4 import BeautifulSoup
import requests
import shutil
import multiprocessing
import pandas as pd
import datetime
import os
import math
import json

Get html elements#

url_base = 'https://www.cloudskillsboost.google'
url_catalog = f'{url_base}/catalog'
file_name = 'gcp_resources'
html = requests.get(url_catalog).text
soup = BeautifulSoup(html, 'html.parser')
num_resources = int(soup.find('p', attrs={'class': 'ql-body-2'}).string.split()[0])
num_pages = math.ceil(num_resources/10)

print(f'{num_resources} resources')
def get_urls():
    for i in range(num_pages):
        yield f'{url_catalog}?page={i+1}'
        
def get_format(element):
    return element.div.h3.a['data-type']

def get_name(element):
    return element.div.h3.a.text

def get_link(element):
    id = element.div.h3.a['href'].split('?')[0][1:]
    link = f'{url_base}/{id}'

    if id.startswith('focuses'):
        link += '?parent=catalog'

    return link

def get_description(element):
    return element.p.text.strip()

def get_level(element):
    tag = element.footer.find('div', 'catalog-item-level')
    level = tag.text.strip() if tag else ''

    return level

def get_credits(element):
    tag = element.footer.find('div', 'catalog-item-cost')
    tag_text = tag.text.strip()
    credits = int(tag_text.split()[0]) if tag_text != 'Free' else 0

    return credits

# status: development
def get_duration(element):
    duration = None
    duration_tag = element.footer.find('div', 'catalog-item-duration')
    if duration_tag:
        tokens = duration_tag.text.strip()
        if tokens == 4:
            hours = tokens[0]
            minutes = tokens[2]
        elif tokens == 2:
            duration_str, unit = tokens
            duration = int(duration_str)

            if unit == 'Hours':
                duration *= 60
            elif unit == 'day':
                duration *= 1440
def get_elements(url):
    html = requests.get(url).text
    soup = BeautifulSoup(html)
    elements = soup.find_all('li', 'catalog__list__item')

    for element in elements:
        data = {
            'format':       get_format(element),
            'name':         get_name(element),
            'link':         get_link(element),
            'description':  get_description(element),
            'credits':      get_credits(element),
            'level':        get_level(element)
        }

        with open(file_name, 'a') as f:
            f.write(json.dumps(data) + ',')

with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
    p.map(get_elements, get_urls())

Load data#

with open(file_name) as f:
    raw_data = f.read()

os.remove(file_name)
data = json.loads(f'[{raw_data[:-1]}]')
actual_data = pd.DataFrame(data)
print(f'{len(actual_data)} resources')

assert num_resources == len(actual_data)
last_data_file = '../data/gcp_resources_chkp_230620.json'
last_data = pd.read_json(last_data_file, orient='split')

Preprocessing#

actual_data[actual_data.duplicated()][['format', 'name']]
actual_data.drop_duplicates(inplace=True)

print(f'{len(actual_data)} resources')
actual_data = actual_data[~actual_data['name'].str.endswith('Locales')]
actual_data = actual_data.sort_values(by=['format','name', 'link'])

print(f'{len(actual_data)} resources')

Save actual data#

timestamp = datetime.datetime.now().strftime('%y%m%d')

shutil.copy(last_data_file, f'{last_data_file}_tmp')
new_file_name = f'{file_name}_chkp_{timestamp}.json'
actual_data.to_json(f'../data/{new_file_name}', indent=4, orient='split', index=False)
print(f'{new_file_name} created!')

Comparison#

last_data['df'] = 'last'
actual_data['df'] = 'actual'

updates = pd.concat([last_data, actual_data])
updates['status'] = 'same'

same_names = updates.duplicated(keep=False, subset=['format', 'name'])
same_links = updates.duplicated(keep=False, subset=['link'])
last_df = updates['df'] == 'last'
actual_df = updates['df'] == 'actual'

updates.loc[last_df   & ~same_links, 'status'] = 'removed'
updates.loc[actual_df & ~same_links, 'status'] = 'new'
updates.loc[actual_df & ~same_links & same_names, 'status'] = 'new_same_name'
updates.loc[last_df   & same_links & ~same_names, 'status'] = 'old_name'
updates.loc[actual_df & same_links & ~same_names, 'status'] = 'new_name'

updates = updates[updates['status'] != 'same'].sort_values(by=['format', 'status', 'name'])

cols = ['format', 'name', 'status', 'link']

updates['link'] = updates['link'].apply(
    lambda x: f'<a href="{x}" target="_blank">link</a>')

updates[cols].style.format()
# Remove temporal Trivia resources
updates = updates[~updates['name'].str.contains('Trivia')]
# Copy markdown to clipboard. 
# Add to the Changelog.
pd.io.clipboards.to_clipboard(updates[cols].to_markdown(index=False), excel=False)
print(f'timestamp: {timestamp}')

Changelog#

230620#

format

name

status

link

Course

Developing Applications with Cloud Run on Google Cloud: Fundamentals

new

link

Course

MySQL to Cloud Spanner

new

link

Lab

Enhancing Network Security Approach on Google Cloud

new

link

Lab

Managing Google Classroom

new

link

Lab

Networking Fundamentals on Google Cloud: Challenge Lab

new

link

Lab

The Basics of Google Cloud Compute: Challenge Lab

new

link

Lab

Use APIs to Work with Cloud Storage: Challenge Lab

new

link

Lab

Using the Google Cloud Speech API: Challenge Lab

new

link

Lab

Build and Deploy Machine Learning Solutions with Vertex AI: Challenge Lab

removed

link

Lab

Google Workspace Admin: Managing Google Meet

removed

link

Lab

Scaling VM-Series to Secure Google Cloud Networks

removed

link

Lab

Using Cloud Trace on Kubernetes Engine

removed

link

Quest

Networking Fundamentals on Google Cloud

new

link

Quest

The Basics of Google Cloud Compute

new

link

Quest

Use APIs to Work with Cloud Storage

new

link

Quest

Using the Google Cloud Speech API

new

link

230607#

format

name

status

link

Course

Generative AI Fundamentals

new

link

Lab

API Gateway: Qwik Start

new

link

Lab

Configure Your Workplace: Google Workspace for IT Admins: Challenge Lab

new

link

Lab

Google Workspace Admin: Getting Started

new

link

Lab

Google Workspace Admin: Managing Applications

new

link

Lab

Google Workspace Admin: Managing Google Meet

new

link

Lab

Google Workspace Admin: Provisioning

new

link

Lab

Google Workspace Admin: Securing

new

link

Lab

Google Workspace Admin: Super Admin Account Recovery

new

link

Lab

Google Workspace for Education: Challenge Lab

new

link

Lab

Google Workspace for Education: Getting Started

new

link

Lab

Google Workspace for Education: Managing Services

new

link

Lab

Setting Up Google Meet for Distance Learning

new

link

Lab

Shared Drives: Getting Started

new

link

Lab

Teaching with Google Classroom

new

link

Lab

HTTPS Content-Based Load Balancer with Terraform

removed

link

Quest

Getting Started with Apache Beam

new_name

link

Quest

Get Started with Apache Beam

old_name

link

230604#

format

name

status

link

Course

Introduction to Generative AI Studio

new

link

Course

Introduction to Responsible AI

new

link

Lab

Analyze Images with the Cloud Vision API: Challenge Lab

new

link

Lab

Offloading Financial Mainframe Data into BigQuery and Elastic Search

new

link

Lab

Protect Sensitive Data with Data Loss Prevention: Challenge Lab

new

link

Lab

Serverless Data Processing with Dataflow - Advanced Streaming Analytics Pipeline with Cloud Dataflow (Java)

new

link

Lab

Serverless Data Processing with Dataflow - Testing with Apache Beam (Java)

new

link

Lab

Serverless Data Processing with Dataflow - Writing an ETL Pipeline using Apache Beam and Cloud Dataflow (Python)

new

link

Lab

Serverless Data Processing with Dataflow - Writing an ETL pipeline using Apache Beam and Cloud Dataflow (Java)

new

link

Lab

Install and Use Cloud Tools for PowerShell

removed

link

Lab

VM Migration: Introduction to StratoZone Migrate

removed

link

Quest

Analyze Images with the Cloud Vision API

new

link

Quest

Get Started with Apache Beam

new

link

Quest

Protect Sensitive Data with Data Loss Prevention

new

link

Quest

Measure Site Reliability using Cloud Operations Suite

removed

link

230528#

format

name

status

link

Course

Media Rendering with Google Cloud

new

link

Course

Launching into Machine Learning

new_same_name

link

Lab

Secure BigLake Data: Challenge Lab

new

link

Lab

Tag and Discover BigLake Data: Challenge Lab

new

link

Lab

API Gateway: Qwik Start

removed

link

Quest

Getting Started with MongoDB Atlas on Google Cloud

new

link

Quest

Secure BigLake Data

new

link

Quest

Tag and Discover BigLake Data

new

link

230522#

format

name

status

link

Lab

Analyze Speech & Language with Google APIs: Challenge Lab

new

link

Lab

Create a Secure Data Lake on Cloud Storage: Challenge Lab

new

link

Lab

Monitoring in Google Cloud: Challenge Lab

new

link

Lab

Building an Application with MongoDB Atlas and Natural Language API hosted on Cloud Run

new_name

link

Lab

MongoDB Atlas with Natural Language API and Cloud Run

old_name

link

Quest

Analyze Speech and Language with Google APIs

new

link

Quest

Create a Secure Data Lake on Cloud Storage

new

link

Quest

Monitoring in Google Cloud

new

link

230518#

format

name

status

link

Course

Discovery AI

new_name

link

Course

Product Discovery

old_name

link

Course

Migrating to Google Cloud

removed

link

Lab

Get Started with Generative AI Studio

new

link

Lab

Hosting a Web App on Google Cloud Using Compute Engine - Azure

new

link

Lab

Enhancing Network Security Approach on Google Cloud

removed

link

Lab

Measure Site Reliability using Cloud Operations Suite: Challenge Lab

removed

link

230517#

format

name

status

link

Course

Create Image Captioning Models

new

link

Course

Encoder-Decoder Architecture

new

link

Course

Introduction to Image Generation

new

link

Course

Preparing for Your Google Workspace Administrator Journey

new

link

Lab

Qwiklabs Trivia May Week 3

new

link

Lab

SAP Landing Zone: Add and Configure Storage to SAP VMs

new

link

Lab

SAP Landing Zone: Plan and Deploy SAP Virtual Machines

new

link

Lab

SAP Landing Zone: Plan and Deploy the SAP Network

new

link

Lab

Qwiklabs Trivia May Week 2

removed

link

Quest

Build an SAP Landing Zone on Google Cloud

new

link

Quest

Google Cloud’s Operations Suite on GKE

removed

link

230515#

format

name

status

link

Lab

Generative AI with Vertex AI: Getting Started

new

link

Lab

Generative AI with Vertex AI: Prompt Design

new

link

Quest

Generative AI Explorer - Vertex AI

new

link

230513#

format

name

status

link

Lab

App Engine: 3 Ways: Challenge Lab

new

link

Lab

Get Started with Eventarc: Challenge Lab

new

link

Lab

Get Started with Pub/Sub: Challenge Lab

new

link

Quest

App Engine: 3 Ways

new

link

Quest

Get Started with Eventarc

new

link

Quest

Get Started with Pub/Sub

new

link

230324 - 230510#

format

name

status

link

Course

Attention Mechanism

new

link

Course

Building Applications with Eventarc on Google Cloud

new

link

Course

Design Foundations for Streaming with Google Cloud

new

link

Course

Developing Containerized Applications on Google Cloud

new

link

Course

Introduction to Generative AI

new

link

Course

Introduction to Large Language Models

new

link

Course

Serving Multimedia Content with Google Cloud

new

link

Course

Transformer Models and BERT Model

new

link

Course

Deploy and Monitor in Google Cloud for AWS Professionals

new_name

link

Course

Deploy and Monitor in Google Cloud for Azure Professionals

new_name

link

Course

Google Cloud Compute and Scalability for AWS Professionals

new_name

link

Course

Google Cloud Compute and Scalability for Azure Professionals

new_name

link

Course

Google Cloud IAM and Networking for AWS Professionals

new_name

link

Course

Google Cloud IAM and Networking for Azure Professionals

new_name

link

Course

Google Cloud Storage and Containers for AWS Professionals

new_name

link

Course

Google Cloud Storage and Containers for Azure Professionals

new_name

link

Course

Machine Learning Operations (MLOps): Getting Started

new_name

link

Course

MLOps (Machine Learning Operations) Fundamentals

old_name

link