babyweight/train_deploy.ipynb#

Based on:

import os

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
print(PROJECT_ID)
cloudskillsboost-377709

Create Dataset

from google.cloud import bigquery

TAG = 'babyweight'

bigquery_client = bigquery.Client()
dataset = bigquery_client.create_dataset(TAG, exists_ok=True)
%load_ext google.cloud.bigquery

Create Bucket

from google.cloud import storage

BUCKET_NAME = f'{PROJECT_ID}-{TAG}'
REGION = 'us-central1'

storage_client = storage.Client()

bucket = storage_client.bucket(BUCKET_NAME)
if not bucket.exists():
    storage_client.create_bucket(BUCKET_NAME, location=REGION)
%%bigquery
CREATE OR REPLACE TABLE
    babyweight.babyweight_data AS
SELECT
    weight_pounds,
    CAST(is_male AS STRING) AS is_male,
    mother_age,
    CASE
        WHEN plurality = 1 THEN "Single(1)"
        WHEN plurality = 2 THEN "Twins(2)"
        WHEN plurality = 3 THEN "Triplets(3)"
        WHEN plurality = 4 THEN "Quadruplets(4)"
        WHEN plurality = 5 THEN "Quintuplets(5)"
    END AS plurality,
    gestation_weeks,
    FARM_FINGERPRINT(
        CONCAT(
            CAST(year AS STRING),
            CAST(month AS STRING)
        )
    ) AS hashmonth
FROM
    publicdata.samples.natality
WHERE
    year > 2000
    AND weight_pounds > 0
    AND mother_age > 0
    AND plurality > 0
    AND gestation_weeks > 0