GCP Skills Boost Updates
Contents
GCP Skills Boost Updates#
from bs4 import BeautifulSoup
import requests
import shutil
import multiprocessing
import pandas as pd
import datetime
import os
import math
import json
Get html elements#
url_base = 'https://www.cloudskillsboost.google'
url_catalog = f'{url_base}/catalog'
file_name = 'gcp_resources'
html = requests.get(url_catalog).text
soup = BeautifulSoup(html, 'html.parser')
num_resources = int(soup.find('p', attrs={'class': 'ql-body-2'}).string.split()[0])
num_pages = math.ceil(num_resources/10)
print(f'{num_resources} resources')
def get_urls():
for i in range(num_pages):
yield f'{url_catalog}?page={i+1}'
def get_format(element):
return element.div.h3.a['data-type']
def get_name(element):
return element.div.h3.a.text
def get_link(element):
id = element.div.h3.a['href'].split('?')[0][1:]
link = f'{url_base}/{id}'
if id.startswith('focuses'):
link += '?parent=catalog'
return link
def get_description(element):
return element.p.text.strip()
def get_level(element):
tag = element.footer.find('div', 'catalog-item-level')
level = tag.text.strip() if tag else ''
return level
def get_credits(element):
tag = element.footer.find('div', 'catalog-item-cost')
tag_text = tag.text.strip()
credits = int(tag_text.split()[0]) if tag_text != 'Free' else 0
return credits
# status: development
def get_duration(element):
duration = None
duration_tag = element.footer.find('div', 'catalog-item-duration')
if duration_tag:
tokens = duration_tag.text.strip()
if tokens == 4:
hours = tokens[0]
minutes = tokens[2]
elif tokens == 2:
duration_str, unit = tokens
duration = int(duration_str)
if unit == 'Hours':
duration *= 60
elif unit == 'day':
duration *= 1440
def get_elements(url):
html = requests.get(url).text
soup = BeautifulSoup(html)
elements = soup.find_all('li', 'catalog__list__item')
for element in elements:
data = {
'format': get_format(element),
'name': get_name(element),
'link': get_link(element),
'description': get_description(element),
'credits': get_credits(element),
'level': get_level(element)
}
with open(file_name, 'a') as f:
f.write(json.dumps(data) + ',')
with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
p.map(get_elements, get_urls())
Load data#
with open(file_name) as f:
raw_data = f.read()
os.remove(file_name)
data = json.loads(f'[{raw_data[:-1]}]')
actual_data = pd.DataFrame(data)
print(f'{len(actual_data)} resources')
assert num_resources == len(actual_data)
last_data_file = '../data/gcp_resources_chkp_230620.json'
last_data = pd.read_json(last_data_file, orient='split')
Preprocessing#
actual_data[actual_data.duplicated()][['format', 'name']]
actual_data.drop_duplicates(inplace=True)
print(f'{len(actual_data)} resources')
actual_data = actual_data[~actual_data['name'].str.endswith('Locales')]
actual_data = actual_data.sort_values(by=['format','name', 'link'])
print(f'{len(actual_data)} resources')
Save actual data#
timestamp = datetime.datetime.now().strftime('%y%m%d')
shutil.copy(last_data_file, f'{last_data_file}_tmp')
new_file_name = f'{file_name}_chkp_{timestamp}.json'
actual_data.to_json(f'../data/{new_file_name}', indent=4, orient='split', index=False)
print(f'{new_file_name} created!')
Comparison#
last_data['df'] = 'last'
actual_data['df'] = 'actual'
updates = pd.concat([last_data, actual_data])
updates['status'] = 'same'
same_names = updates.duplicated(keep=False, subset=['format', 'name'])
same_links = updates.duplicated(keep=False, subset=['link'])
last_df = updates['df'] == 'last'
actual_df = updates['df'] == 'actual'
updates.loc[last_df & ~same_links, 'status'] = 'removed'
updates.loc[actual_df & ~same_links, 'status'] = 'new'
updates.loc[actual_df & ~same_links & same_names, 'status'] = 'new_same_name'
updates.loc[last_df & same_links & ~same_names, 'status'] = 'old_name'
updates.loc[actual_df & same_links & ~same_names, 'status'] = 'new_name'
updates = updates[updates['status'] != 'same'].sort_values(by=['format', 'status', 'name'])
cols = ['format', 'name', 'status', 'link']
updates['link'] = updates['link'].apply(
lambda x: f'<a href="{x}" target="_blank">link</a>')
updates[cols].style.format()
# Remove temporal Trivia resources
updates = updates[~updates['name'].str.contains('Trivia')]
# Copy markdown to clipboard.
# Add to the Changelog.
pd.io.clipboards.to_clipboard(updates[cols].to_markdown(index=False), excel=False)
print(f'timestamp: {timestamp}')
Changelog#
230620#
format |
name |
status |
link |
---|---|---|---|
Course |
Developing Applications with Cloud Run on Google Cloud: Fundamentals |
new |
|
Course |
MySQL to Cloud Spanner |
new |
|
Lab |
Enhancing Network Security Approach on Google Cloud |
new |
|
Lab |
Managing Google Classroom |
new |
|
Lab |
Networking Fundamentals on Google Cloud: Challenge Lab |
new |
|
Lab |
The Basics of Google Cloud Compute: Challenge Lab |
new |
|
Lab |
Use APIs to Work with Cloud Storage: Challenge Lab |
new |
|
Lab |
Using the Google Cloud Speech API: Challenge Lab |
new |
|
Lab |
Build and Deploy Machine Learning Solutions with Vertex AI: Challenge Lab |
removed |
|
Lab |
Google Workspace Admin: Managing Google Meet |
removed |
|
Lab |
Scaling VM-Series to Secure Google Cloud Networks |
removed |
|
Lab |
Using Cloud Trace on Kubernetes Engine |
removed |
|
Quest |
Networking Fundamentals on Google Cloud |
new |
|
Quest |
The Basics of Google Cloud Compute |
new |
|
Quest |
Use APIs to Work with Cloud Storage |
new |
|
Quest |
Using the Google Cloud Speech API |
new |
230607#
format |
name |
status |
link |
---|---|---|---|
Course |
Generative AI Fundamentals |
new |
|
Lab |
API Gateway: Qwik Start |
new |
|
Lab |
Configure Your Workplace: Google Workspace for IT Admins: Challenge Lab |
new |
|
Lab |
Google Workspace Admin: Getting Started |
new |
|
Lab |
Google Workspace Admin: Managing Applications |
new |
|
Lab |
Google Workspace Admin: Managing Google Meet |
new |
|
Lab |
Google Workspace Admin: Provisioning |
new |
|
Lab |
Google Workspace Admin: Securing |
new |
|
Lab |
Google Workspace Admin: Super Admin Account Recovery |
new |
|
Lab |
Google Workspace for Education: Challenge Lab |
new |
|
Lab |
Google Workspace for Education: Getting Started |
new |
|
Lab |
Google Workspace for Education: Managing Services |
new |
|
Lab |
Setting Up Google Meet for Distance Learning |
new |
|
Lab |
Shared Drives: Getting Started |
new |
|
Lab |
Teaching with Google Classroom |
new |
|
Lab |
HTTPS Content-Based Load Balancer with Terraform |
removed |
|
Quest |
Getting Started with Apache Beam |
new_name |
|
Quest |
Get Started with Apache Beam |
old_name |
230604#
format |
name |
status |
link |
---|---|---|---|
Course |
Introduction to Generative AI Studio |
new |
|
Course |
Introduction to Responsible AI |
new |
|
Lab |
Analyze Images with the Cloud Vision API: Challenge Lab |
new |
|
Lab |
Offloading Financial Mainframe Data into BigQuery and Elastic Search |
new |
|
Lab |
Protect Sensitive Data with Data Loss Prevention: Challenge Lab |
new |
|
Lab |
Serverless Data Processing with Dataflow - Advanced Streaming Analytics Pipeline with Cloud Dataflow (Java) |
new |
|
Lab |
Serverless Data Processing with Dataflow - Testing with Apache Beam (Java) |
new |
|
Lab |
Serverless Data Processing with Dataflow - Writing an ETL Pipeline using Apache Beam and Cloud Dataflow (Python) |
new |
|
Lab |
Serverless Data Processing with Dataflow - Writing an ETL pipeline using Apache Beam and Cloud Dataflow (Java) |
new |
|
Lab |
Install and Use Cloud Tools for PowerShell |
removed |
|
Lab |
VM Migration: Introduction to StratoZone Migrate |
removed |
|
Quest |
Analyze Images with the Cloud Vision API |
new |
|
Quest |
Get Started with Apache Beam |
new |
|
Quest |
Protect Sensitive Data with Data Loss Prevention |
new |
|
Quest |
Measure Site Reliability using Cloud Operations Suite |
removed |
230528#
format |
name |
status |
link |
---|---|---|---|
Course |
Media Rendering with Google Cloud |
new |
|
Course |
Launching into Machine Learning |
new_same_name |
|
Lab |
Secure BigLake Data: Challenge Lab |
new |
|
Lab |
Tag and Discover BigLake Data: Challenge Lab |
new |
|
Lab |
API Gateway: Qwik Start |
removed |
|
Quest |
Getting Started with MongoDB Atlas on Google Cloud |
new |
|
Quest |
Secure BigLake Data |
new |
|
Quest |
Tag and Discover BigLake Data |
new |
230522#
format |
name |
status |
link |
---|---|---|---|
Lab |
Analyze Speech & Language with Google APIs: Challenge Lab |
new |
|
Lab |
Create a Secure Data Lake on Cloud Storage: Challenge Lab |
new |
|
Lab |
Monitoring in Google Cloud: Challenge Lab |
new |
|
Lab |
Building an Application with MongoDB Atlas and Natural Language API hosted on Cloud Run |
new_name |
|
Lab |
MongoDB Atlas with Natural Language API and Cloud Run |
old_name |
|
Quest |
Analyze Speech and Language with Google APIs |
new |
|
Quest |
Create a Secure Data Lake on Cloud Storage |
new |
|
Quest |
Monitoring in Google Cloud |
new |
230518#
format |
name |
status |
link |
---|---|---|---|
Course |
Discovery AI |
new_name |
|
Course |
Product Discovery |
old_name |
|
Course |
Migrating to Google Cloud |
removed |
|
Lab |
Get Started with Generative AI Studio |
new |
|
Lab |
Hosting a Web App on Google Cloud Using Compute Engine - Azure |
new |
|
Lab |
Enhancing Network Security Approach on Google Cloud |
removed |
|
Lab |
Measure Site Reliability using Cloud Operations Suite: Challenge Lab |
removed |
230517#
format |
name |
status |
link |
---|---|---|---|
Course |
Create Image Captioning Models |
new |
|
Course |
Encoder-Decoder Architecture |
new |
|
Course |
Introduction to Image Generation |
new |
|
Course |
Preparing for Your Google Workspace Administrator Journey |
new |
|
Lab |
Qwiklabs Trivia May Week 3 |
new |
|
Lab |
SAP Landing Zone: Add and Configure Storage to SAP VMs |
new |
|
Lab |
SAP Landing Zone: Plan and Deploy SAP Virtual Machines |
new |
|
Lab |
SAP Landing Zone: Plan and Deploy the SAP Network |
new |
|
Lab |
Qwiklabs Trivia May Week 2 |
removed |
|
Quest |
Build an SAP Landing Zone on Google Cloud |
new |
|
Quest |
Google Cloud’s Operations Suite on GKE |
removed |
230515#
format |
name |
status |
link |
---|---|---|---|
Lab |
Generative AI with Vertex AI: Getting Started |
new |
|
Lab |
Generative AI with Vertex AI: Prompt Design |
new |
|
Quest |
Generative AI Explorer - Vertex AI |
new |
230513#
format |
name |
status |
link |
---|---|---|---|
Lab |
App Engine: 3 Ways: Challenge Lab |
new |
|
Lab |
Get Started with Eventarc: Challenge Lab |
new |
|
Lab |
Get Started with Pub/Sub: Challenge Lab |
new |
|
Quest |
App Engine: 3 Ways |
new |
|
Quest |
Get Started with Eventarc |
new |
|
Quest |
Get Started with Pub/Sub |
new |
230324 - 230510#
format |
name |
status |
link |
---|---|---|---|
Course |
Attention Mechanism |
new |
|
Course |
Building Applications with Eventarc on Google Cloud |
new |
|
Course |
Design Foundations for Streaming with Google Cloud |
new |
|
Course |
Developing Containerized Applications on Google Cloud |
new |
|
Course |
Introduction to Generative AI |
new |
|
Course |
Introduction to Large Language Models |
new |
|
Course |
Serving Multimedia Content with Google Cloud |
new |
|
Course |
Transformer Models and BERT Model |
new |
|
Course |
Deploy and Monitor in Google Cloud for AWS Professionals |
new_name |
|
Course |
Deploy and Monitor in Google Cloud for Azure Professionals |
new_name |
|
Course |
Google Cloud Compute and Scalability for AWS Professionals |
new_name |
|
Course |
Google Cloud Compute and Scalability for Azure Professionals |
new_name |
|
Course |
Google Cloud IAM and Networking for AWS Professionals |
new_name |
|
Course |
Google Cloud IAM and Networking for Azure Professionals |
new_name |
|
Course |
Google Cloud Storage and Containers for AWS Professionals |
new_name |
|
Course |
Google Cloud Storage and Containers for Azure Professionals |
new_name |
|
Course |
Machine Learning Operations (MLOps): Getting Started |
new_name |
|
Course |
MLOps (Machine Learning Operations) Fundamentals |
old_name |