Install Requirements

In [1]:
%pip install mlrun scikit-learn pandas numpy
Requirement already satisfied: mlrun in /opt/conda/lib/python3.8/site-packages (0.6.4rc7)
Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.8/site-packages (0.23.2)
Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (1.2.4)
Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (1.19.5)
Requirement already satisfied: cryptography~=3.3.2 in /opt/conda/lib/python3.8/site-packages (from mlrun) (3.3.2)
Requirement already satisfied: pydantic~=1.5 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.8.2)
Requirement already satisfied: mergedeep~=1.3 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.3.4)
Requirement already satisfied: tabulate<=0.8.3,>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.8.3)
Requirement already satisfied: fsspec~=0.9.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.9.0)
Requirement already satisfied: v3io-frames~=0.8.5 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.8.14)
Requirement already satisfied: kubernetes~=11.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (11.0.0)
Requirement already satisfied: humanfriendly~=8.2 in /opt/conda/lib/python3.8/site-packages (from mlrun) (8.2)
Requirement already satisfied: v3iofs~=0.1.5 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.1.6)
Requirement already satisfied: fastapi~=0.62.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.62.0)
Requirement already satisfied: requests~=2.22 in /opt/conda/lib/python3.8/site-packages (from mlrun) (2.25.1)
Requirement already satisfied: GitPython~=3.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (3.1.17)
Requirement already satisfied: aiohttp~=3.6 in /opt/conda/lib/python3.8/site-packages (from mlrun) (3.7.4.post0)
Requirement already satisfied: pyyaml~=5.1 in /opt/conda/lib/python3.8/site-packages (from mlrun) (5.4.1)
Requirement already satisfied: nest-asyncio~=1.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.5.1)
Requirement already satisfied: ipython<7.17,>=5.5 in /opt/conda/lib/python3.8/site-packages (from mlrun) (7.16.1)
Requirement already satisfied: click~=7.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (7.1.2)
Requirement already satisfied: dask~=2.12 in /opt/conda/lib/python3.8/site-packages (from mlrun) (2.30.0)
Requirement already satisfied: storey~=0.5.0; python_version >= "3.7" in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.5.0)
Requirement already satisfied: distributed<3,>=2.23 in /opt/conda/lib/python3.8/site-packages (from mlrun) (2.30.1)
Requirement already satisfied: alembic<1.6.0,~=1.4 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.5.8)
Requirement already satisfied: nuclio-jupyter==0.8.13 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.8.13)
Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.26.4)
Requirement already satisfied: sqlalchemy~=1.3 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.4.13)
Requirement already satisfied: v3io~=0.5.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (0.5.7)
Requirement already satisfied: semver~=2.13 in /opt/conda/lib/python3.8/site-packages (from mlrun) (2.13.0)
Requirement already satisfied: chardet<4.0,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from mlrun) (3.0.4)
Requirement already satisfied: kfp~=1.0.1 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.0.4)
Requirement already satisfied: pyarrow~=1.0 in /opt/conda/lib/python3.8/site-packages (from mlrun) (1.0.1)
Requirement already satisfied: orjson<3.4,>=3 in /opt/conda/lib/python3.8/site-packages (from mlrun) (3.3.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from scikit-learn) (2.1.0)
Requirement already satisfied: scipy>=0.19.1 in /opt/conda/lib/python3.8/site-packages (from scikit-learn) (1.6.3)
Requirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.8/site-packages (from scikit-learn) (1.0.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas) (2.8.1)
Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.8/site-packages (from pandas) (2021.1)
Requirement already satisfied: six>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from cryptography~=3.3.2->mlrun) (1.16.0)
Requirement already satisfied: cffi>=1.12 in /opt/conda/lib/python3.8/site-packages (from cryptography~=3.3.2->mlrun) (1.14.5)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.8/site-packages (from pydantic~=1.5->mlrun) (3.7.4.3)
Requirement already satisfied: grpcio-tools==1.30.0 in /opt/conda/lib/python3.8/site-packages (from v3io-frames~=0.8.5->mlrun) (1.30.0)
Requirement already satisfied: grpcio==1.30.0 in /opt/conda/lib/python3.8/site-packages (from v3io-frames~=0.8.5->mlrun) (1.30.0)
Requirement already satisfied: googleapis-common-protos>=1.5.3 in /opt/conda/lib/python3.8/site-packages (from v3io-frames~=0.8.5->mlrun) (1.53.0)
Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.8/site-packages (from kubernetes~=11.0->mlrun) (1.0.1)
Requirement already satisfied: requests-oauthlib in /opt/conda/lib/python3.8/site-packages (from kubernetes~=11.0->mlrun) (1.3.0)
Requirement already satisfied: setuptools>=21.0.0 in /opt/conda/lib/python3.8/site-packages (from kubernetes~=11.0->mlrun) (49.6.0.post20210108)
Requirement already satisfied: certifi>=14.05.14 in /opt/conda/lib/python3.8/site-packages (from kubernetes~=11.0->mlrun) (2020.12.5)
Requirement already satisfied: google-auth>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from kubernetes~=11.0->mlrun) (1.30.0)
Requirement already satisfied: starlette==0.13.6 in /opt/conda/lib/python3.8/site-packages (from fastapi~=0.62.0->mlrun) (0.13.6)
Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests~=2.22->mlrun) (2.10)
Requirement already satisfied: gitdb<5,>=4.0.1 in /opt/conda/lib/python3.8/site-packages (from GitPython~=3.0->mlrun) (4.0.7)
Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp~=3.6->mlrun) (1.6.3)
Requirement already satisfied: async-timeout<4.0,>=3.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp~=3.6->mlrun) (3.0.1)
Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.8/site-packages (from aiohttp~=3.6->mlrun) (5.1.0)
Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp~=3.6->mlrun) (20.3.0)
Requirement already satisfied: jedi>=0.10 in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (0.18.0)
Requirement already satisfied: pexpect; sys_platform != "win32" in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (4.8.0)
Requirement already satisfied: traitlets>=4.2 in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (5.0.5)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (3.0.18)
Requirement already satisfied: pickleshare in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (0.7.5)
Requirement already satisfied: decorator in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (5.0.7)
Requirement already satisfied: pygments in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (2.9.0)
Requirement already satisfied: backcall in /opt/conda/lib/python3.8/site-packages (from ipython<7.17,>=5.5->mlrun) (0.2.0)
Requirement already satisfied: zict>=0.1.3 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (2.0.0)
Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (2.3.0)
Requirement already satisfied: toolz>=0.8.2 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (0.11.1)
Requirement already satisfied: cloudpickle>=1.5.0 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (1.6.0)
Requirement already satisfied: tblib>=1.6.0 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (1.7.0)
Requirement already satisfied: tornado>=6.0.3; python_version >= "3.8" in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (6.1)
Requirement already satisfied: msgpack>=0.6.0 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (1.0.2)
Requirement already satisfied: psutil>=5.0 in /opt/conda/lib/python3.8/site-packages (from distributed<3,>=2.23->mlrun) (5.8.0)
Requirement already satisfied: Mako in /opt/conda/lib/python3.8/site-packages (from alembic<1.6.0,~=1.4->mlrun) (1.1.4)
Requirement already satisfied: python-editor>=0.3 in /opt/conda/lib/python3.8/site-packages (from alembic<1.6.0,~=1.4->mlrun) (1.0.4)
Requirement already satisfied: boto3>=1.9 in /opt/conda/lib/python3.8/site-packages (from nuclio-jupyter==0.8.13->mlrun) (1.17.49)
Requirement already satisfied: jupyter-server~=1.0 in /opt/conda/lib/python3.8/site-packages (from nuclio-jupyter==0.8.13->mlrun) (1.6.4)
Requirement already satisfied: notebook>=5.2.0 in /opt/conda/lib/python3.8/site-packages (from nuclio-jupyter==0.8.13->mlrun) (6.3.0)
Requirement already satisfied: nbconvert>=5.4 in /opt/conda/lib/python3.8/site-packages (from nuclio-jupyter==0.8.13->mlrun) (6.0.7)
Requirement already satisfied: greenlet!=0.4.17; python_version >= "3" in /opt/conda/lib/python3.8/site-packages (from sqlalchemy~=1.3->mlrun) (1.0.0)
Requirement already satisfied: future>=0.18.2 in /opt/conda/lib/python3.8/site-packages (from v3io~=0.5.0->mlrun) (0.18.2)
Requirement already satisfied: ujson>=3.0.0 in /opt/conda/lib/python3.8/site-packages (from v3io~=0.5.0->mlrun) (4.0.2)
Requirement already satisfied: strip-hints in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (0.1.9)
Requirement already satisfied: google-cloud-storage>=1.13.0 in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (1.38.0)
Requirement already satisfied: requests-toolbelt>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (0.9.1)
Requirement already satisfied: jsonschema>=3.0.1 in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (3.2.0)
Requirement already satisfied: kfp-server-api<2.0.0,>=0.2.5 in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (1.6.0)
Requirement already satisfied: Deprecated in /opt/conda/lib/python3.8/site-packages (from kfp~=1.0.1->mlrun) (1.2.12)
Requirement already satisfied: pycparser in /opt/conda/lib/python3.8/site-packages (from cffi>=1.12->cryptography~=3.3.2->mlrun) (2.20)
Requirement already satisfied: protobuf>=3.5.0.post1 in /opt/conda/lib/python3.8/site-packages (from grpcio-tools==1.30.0->v3io-frames~=0.8.5->mlrun) (3.15.8)
Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.8/site-packages (from requests-oauthlib->kubernetes~=11.0->mlrun) (3.0.1)
Requirement already satisfied: cachetools<5.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from google-auth>=1.0.1->kubernetes~=11.0->mlrun) (4.2.2)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.8/site-packages (from google-auth>=1.0.1->kubernetes~=11.0->mlrun) (0.2.8)
Requirement already satisfied: rsa<5,>=3.1.4; python_version >= "3.6" in /opt/conda/lib/python3.8/site-packages (from google-auth>=1.0.1->kubernetes~=11.0->mlrun) (4.7.2)
Requirement already satisfied: smmap<5,>=3.0.1 in /opt/conda/lib/python3.8/site-packages (from gitdb<5,>=4.0.1->GitPython~=3.0->mlrun) (4.0.0)
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from jedi>=0.10->ipython<7.17,>=5.5->mlrun) (0.8.2)
Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.8/site-packages (from pexpect; sys_platform != "win32"->ipython<7.17,>=5.5->mlrun) (0.7.0)
Requirement already satisfied: ipython-genutils in /opt/conda/lib/python3.8/site-packages (from traitlets>=4.2->ipython<7.17,>=5.5->mlrun) (0.2.0)
Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython<7.17,>=5.5->mlrun) (0.2.5)
Requirement already satisfied: heapdict in /opt/conda/lib/python3.8/site-packages (from zict>=0.1.3->distributed<3,>=2.23->mlrun) (1.0.1)
Requirement already satisfied: MarkupSafe>=0.9.2 in /opt/conda/lib/python3.8/site-packages (from Mako->alembic<1.6.0,~=1.4->mlrun) (1.1.1)
Requirement already satisfied: botocore<1.21.0,>=1.20.49 in /opt/conda/lib/python3.8/site-packages (from boto3>=1.9->nuclio-jupyter==0.8.13->mlrun) (1.20.49)
Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.8/site-packages (from boto3>=1.9->nuclio-jupyter==0.8.13->mlrun) (0.10.0)
Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from boto3>=1.9->nuclio-jupyter==0.8.13->mlrun) (0.3.7)
Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (0.9.4)
Requirement already satisfied: jinja2 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (2.11.3)
Requirement already satisfied: Send2Trash in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (1.5.0)
Requirement already satisfied: jupyter-client>=6.1.1 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (6.1.12)
Requirement already satisfied: anyio<3,>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (2.2.0)
Requirement already satisfied: jupyter-core>=4.4.0 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (4.7.1)
Requirement already satisfied: nbformat in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (5.1.3)
Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (0.10.1)
Requirement already satisfied: jupyter-packaging~=0.9 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (0.10.1)
Requirement already satisfied: pyzmq>=17 in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (22.0.3)
Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.8/site-packages (from jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (20.1.0)
Requirement already satisfied: ipykernel in /opt/conda/lib/python3.8/site-packages (from notebook>=5.2.0->nuclio-jupyter==0.8.13->mlrun) (5.5.4)
Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.3)
Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (3.3.0)
Requirement already satisfied: defusedxml in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.7.1)
Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.1.2)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.5.3)
Requirement already satisfied: testpath in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.4.4)
Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (1.4.2)
Requirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.8.4)
Requirement already satisfied: wheel in /opt/conda/lib/python3.8/site-packages (from strip-hints->kfp~=1.0.1->mlrun) (0.36.2)
Requirement already satisfied: google-cloud-core<2.0dev,>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from google-cloud-storage>=1.13.0->kfp~=1.0.1->mlrun) (1.6.0)
Requirement already satisfied: google-resumable-media<2.0dev,>=1.2.0 in /opt/conda/lib/python3.8/site-packages (from google-cloud-storage>=1.13.0->kfp~=1.0.1->mlrun) (1.3.0)
Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema>=3.0.1->kfp~=1.0.1->mlrun) (0.17.3)
Requirement already satisfied: wrapt<2,>=1.10 in /opt/conda/lib/python3.8/site-packages (from Deprecated->kfp~=1.0.1->mlrun) (1.12.1)
Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes~=11.0->mlrun) (0.4.8)
Requirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.8/site-packages (from anyio<3,>=2.0.2->jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (1.2.0)
Requirement already satisfied: deprecation in /opt/conda/lib/python3.8/site-packages (from jupyter-packaging~=0.9->jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (2.1.0)
Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from jupyter-packaging~=0.9->jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (20.9)
Requirement already satisfied: tomlkit in /opt/conda/lib/python3.8/site-packages (from jupyter-packaging~=0.9->jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (0.7.0)
Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (0.5.1)
Requirement already satisfied: async-generator in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert>=5.4->nuclio-jupyter==0.8.13->mlrun) (1.10)
Requirement already satisfied: google-api-core<2.0.0dev,>=1.21.0 in /opt/conda/lib/python3.8/site-packages (from google-cloud-core<2.0dev,>=1.4.1->google-cloud-storage>=1.13.0->kfp~=1.0.1->mlrun) (1.28.0)
Requirement already satisfied: google-crc32c<2.0dev,>=1.0; python_version >= "3.5" in /opt/conda/lib/python3.8/site-packages (from google-resumable-media<2.0dev,>=1.2.0->google-cloud-storage>=1.13.0->kfp~=1.0.1->mlrun) (1.1.2)
Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging->jupyter-packaging~=0.9->jupyter-server~=1.0->nuclio-jupyter==0.8.13->mlrun) (2.4.7)
WARNING: You are using pip version 20.2.4; however, version 21.1.3 is available.
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.
Note: you may need to restart the kernel to use updated packages.

Create an MLRun project and configuration

In [2]:
from os import path
import mlrun

project_name_base = 'pmt-app'

project_name, artifact_path = mlrun.set_environment(project=project_name_base, user_project=True)

print(f'Project name: {project_name}')
print(f'Artifact path: {artifact_path}')
Project name: pmt-app-jovyan
Artifact path: /home/jovyan/data
In [4]:
from os import path
import numpy as np 
import pandas as pd
import datetime as dt
from mlrun.execution import MLClientCtx
from mlrun.datastore import DataItem
from pickle import dumps
from sklearn.ensemble import RandomForestClassifier
In [5]:
def clean_yes_no_column(serie, train=True, train_mean=None):
    _serie = serie.apply(lambda x: 0 if x=="no" else x)
    _serie = _serie.apply(lambda x: float(x) if x!="yes" else x)
    if train:
        mean_value = _serie[_serie != "yes"].mean()
    else:
        mean_value = train_mean
    return _serie.apply(lambda x: mean_value if x=="yes" else x)
In [6]:
def fetch_data(context : MLClientCtx, pmt_records_path: DataItem):
    
    
    pmt_records_dataset = pmt_records_path.as_df()
    
    
    
    target_path = path.join(context.artifact_path, 'data')
    context.logger.info('Saving datasets to {} ...'.format(target_path))

    # Store the data sets in your artifacts database
    context.log_dataset('pmt-app-dataset', df=pmt_records_dataset, format='csv',
                        index=False, artifact_path=target_path)
      
In [7]:
def transform_dataset(context : MLClientCtx, pmt_records_path: DataItem):
    
    context.logger.info('Begin datasets transform')
    
    
    
    
    
    
    
    train_data = pmt_records_path.as_df()
    fill_dict = {"v2a1": train_data.v2a1.median(), #Monthly rent payment
                 "v18q1": 0, #number of tablets household owns
                 "rez_esc": train_data.rez_esc.median(), #Years behind in school
                 "meaneduc": train_data.meaneduc.median(), #average years of education for adults (18+)
                }
    train_data = train_data.fillna(fill_dict)
    train_data.SQBmeaned = np.sqrt(train_data.meaneduc)

    train_data.dependency = clean_yes_no_column(train_data.dependency)
    train_data.edjefe = clean_yes_no_column(train_data.edjefe)
    train_data.edjefa = clean_yes_no_column(train_data.edjefa)
    
    # Save dataset to artifact
    target_path = path.join(context.artifact_path, 'data')
    context.log_dataset('pmt-app-dataset-transformed', df=train_data, artifact_path=target_path, format='csv')    
    
    context.logger.info('End dataset transform')
In [8]:
def train_model(context: MLClientCtx, input_ds: DataItem):
    
    context.logger.info('Begin training')
    train_data = input_ds.as_df();
    feature_cols = [x for x in train_data.columns if x not in ["Target", "Id", "idhogar"]]
    X = train_data[feature_cols]
    y = train_data.Target
    

    model =  RandomForestClassifier()
    model.fit(X, y)
    
    
    
   
    
    context.log_model('ModelPMT',
                     body=dumps(model),
                     artifact_path=context.artifact_subpath("models"),
                     model_file="ModelPMT.pkl")
    
    context.logger.info('End training')
In [9]:
# mlrun: end-code

Set Input Paths

In [10]:
pmt_records_csv_path = 'https://pmt-data.herokuapp.com/train.csv'

Convert Code to a Function

In [11]:
model_pmt_func = mlrun.code_to_function(name='model_pmt',
                                   kind='job',
                                   image='mlrun/mlrun',
                                   requirements=['scikit-learn', 'numpy','pandas'])

Run fetch_data Locally

We can test out code locally, by calling the function with local parameter set to True

In [12]:
fetch_data_run = model_pmt_func.run(handler='fetch_data',
                               inputs={'pmt_records_path': pmt_records_csv_path},
                               local=True)
> 2021-06-27 12:15:53,354 [info] starting run model-pmt-fetch_data uid=c9c1eb7518124cb2aff6512e835ecced DB=http://mlrun-api:8080
> 2021-06-27 12:16:20,025 [info] Saving datasets to /home/jovyan/data/data ...
project uid iter start state name labels inputs parameters results artifacts
pmt-app-jovyan 0 Jun 27 12:15:53 completed model-pmt-fetch_data
kind=
owner=jovyan
host=mlrun-kit-jupyter-6879c4d97f-ksbvf
pmt_records_path
pmt-app-dataset
to track results use .show() or .logs() or in CLI: 
!mlrun get run c9c1eb7518124cb2aff6512e835ecced --project pmt-app-jovyan , !mlrun logs c9c1eb7518124cb2aff6512e835ecced --project pmt-app-jovyan
> 2021-06-27 12:16:20,896 [info] run executed, status=completed
In [13]:
fetch_data_run.outputs
Out[13]:
{'pmt-app-dataset': 'store://artifacts/pmt-app-jovyan/model-pmt-fetch_data_pmt-app-dataset:c9c1eb7518124cb2aff6512e835ecced'}

Run on the Cluster

In [14]:
from mlrun.platforms import auto_mount
model_pmt_func.apply(auto_mount())
model_pmt_func.deploy()
> 2021-06-27 12:16:28,114 [info] starting remote build, image: .abhayrpatel10/func-pmt-app-jovyan-model-pmt:latest
E0627 12:16:40.446443       1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.
	For verbose messaging see aws.Config.CredentialsChainVerboseErrors
INFO[0004] Retrieving image manifest mlrun/mlrun:0.6.4-rc7 
INFO[0008] Retrieving image manifest mlrun/mlrun:0.6.4-rc7 
INFO[0011] Built cross stage deps: map[]                
INFO[0011] Retrieving image manifest mlrun/mlrun:0.6.4-rc7 
INFO[0014] Retrieving image manifest mlrun/mlrun:0.6.4-rc7 
INFO[0018] Executing 0 build triggers                   
INFO[0018] Unpacking rootfs as cmd RUN python -m pip install scikit-learn numpy pandas requires it. 
INFO[0135] RUN python -m pip install scikit-learn numpy pandas 
INFO[0135] Taking snapshot of full filesystem...        
INFO[0160] cmd: /bin/sh                                 
INFO[0160] args: [-c python -m pip install scikit-learn numpy pandas] 
INFO[0160] Running: [/bin/sh -c python -m pip install scikit-learn numpy pandas] 
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/site-packages (0.23.2)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/site-packages (1.19.5)
Requirement already satisfied: pandas in /usr/local/lib/python3.7/site-packages (1.2.4)
Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.7/site-packages (from scikit-learn) (1.6.3)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/site-packages (from scikit-learn) (1.0.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/site-packages (from scikit-learn) (2.1.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2.8.1)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2021.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)
WARNING: You are using pip version 20.2.4; however, version 21.1.3 is available.
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.
INFO[0163] Taking snapshot of full filesystem...        
Out[14]:
True
In [15]:
fetch_data_run = model_pmt_func.run(name='fetch_data',
                               handler='fetch_data',
                               inputs={'pmt_records_path': pmt_records_csv_path})
> 2021-06-27 12:19:39,216 [info] starting run fetch_data uid=8c767330a1854d368677abd357df704d DB=http://mlrun-api:8080
> 2021-06-27 12:19:39,284 [info] Job is running in the background, pod: fetch-data-2qdnt
> 2021-06-27 12:20:32,784 [info] Saving datasets to /home/jovyan/data/data ...
> 2021-06-27 12:20:33,496 [info] run executed, status=completed
final state: completed
project uid iter start state name labels inputs parameters results artifacts
pmt-app-jovyan 0 Jun 27 12:20:20 completed fetch_data
kind=job
owner=jovyan
host=fetch-data-2qdnt
pmt_records_path
pmt-app-dataset
to track results use .show() or .logs() or in CLI: 
!mlrun get run 8c767330a1854d368677abd357df704d --project pmt-app-jovyan , !mlrun logs 8c767330a1854d368677abd357df704d --project pmt-app-jovyan
> 2021-06-27 12:20:40,627 [info] run executed, status=completed
In [16]:
fetch_data_run.outputs['pmt-app-dataset']
Out[16]:
'store://artifacts/pmt-app-jovyan/fetch_data_pmt-app-dataset:8c767330a1854d368677abd357df704d'

Transform the Dataset

In [17]:
transform_dataset_run = model_pmt_func.run(name='transform_dataset',
                                      handler='transform_dataset',
                                      inputs={'pmt_records_path': fetch_data_run.outputs['pmt-app-dataset']})
> 2021-06-27 12:23:10,394 [info] starting run transform_dataset uid=947e71d7c22f4c21acec73cf9cd2df3b DB=http://mlrun-api:8080
> 2021-06-27 12:23:11,019 [info] Job is running in the background, pod: transform-dataset-r9sl7
> 2021-06-27 12:23:26,323 [info] Begin datasets transform
> 2021-06-27 12:23:27,394 [info] End dataset transform
> 2021-06-27 12:23:27,421 [info] run executed, status=completed
final state: completed
project uid iter start state name labels inputs parameters results artifacts
pmt-app-jovyan 0 Jun 27 12:23:26 completed transform_dataset
kind=job
owner=jovyan
host=transform-dataset-r9sl7
pmt_records_path
pmt-app-dataset-transformed
to track results use .show() or .logs() or in CLI: 
!mlrun get run 947e71d7c22f4c21acec73cf9cd2df3b --project pmt-app-jovyan , !mlrun logs 947e71d7c22f4c21acec73cf9cd2df3b --project pmt-app-jovyan
> 2021-06-27 12:23:31,290 [info] run executed, status=completed
In [18]:
transform_dataset_run.outputs
Out[18]:
{'pmt-app-dataset-transformed': 'store://artifacts/pmt-app-jovyan/transform_dataset_pmt-app-dataset-transformed:947e71d7c22f4c21acec73cf9cd2df3b'}

Train Model

In [19]:
train_model_run = model_pmt_func.run(name='train_model',
                                handler='train_model',
                                inputs={'input_ds': transform_dataset_run.outputs['pmt-app-dataset-transformed']})
> 2021-06-27 12:23:37,274 [info] starting run train_model uid=c9e3fcddb5ed42ab85f1709c7780e2a4 DB=http://mlrun-api:8080
> 2021-06-27 12:23:37,349 [info] Job is running in the background, pod: train-model-p279l
> 2021-06-27 12:23:50,911 [info] Begin training
> 2021-06-27 12:23:55,909 [info] End training
> 2021-06-27 12:23:55,920 [info] run executed, status=completed
final state: completed
project uid iter start state name labels inputs parameters results artifacts
pmt-app-jovyan 0 Jun 27 12:23:50 completed train_model
kind=job
owner=jovyan
host=train-model-p279l
input_ds
ModelPMT
to track results use .show() or .logs() or in CLI: 
!mlrun get run c9e3fcddb5ed42ab85f1709c7780e2a4 --project pmt-app-jovyan , !mlrun logs c9e3fcddb5ed42ab85f1709c7780e2a4 --project pmt-app-jovyan
> 2021-06-27 12:23:58,760 [info] run executed, status=completed
In [23]:
train_model_run.outputs['ModelPMT']
Out[23]:
'store://artifacts/pmt-app-jovyan/train_model_ModelPMT:c9e3fcddb5ed42ab85f1709c7780e2a4'

Serving

In [25]:
from mlrun import import_function
from mlrun.platforms import auto_mount


serve = import_function('hub://v2_model_server').apply(auto_mount())
model_name='PMTModel'
serve.add_model(model_name, model_path=train_model_run.outputs['ModelPMT'])
addr = serve.deploy()
> 2021-06-27 13:14:42,890 [info] Starting remote function deploy
2021-06-27 13:14:44  (info) Deploying function
2021-06-27 13:14:44  (info) Building
2021-06-27 13:14:44  (info) Staging files and preparing base images
2021-06-27 13:14:44  (info) Building processor image
2021-06-27 13:16:19  (info) Build complete
> 2021-06-27 13:16:39,541 [info] function deployed, address=192.168.65.4:30686
In [31]:
# Test the model
In [30]:
import json

inputs = [[190000,0,3,0,1,1,0,0,0,1,1,0,0,0,0,1,1,1,1,10,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,10,0,10,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,0,43,100,1849,1,100,0,1,0,100,1849]]
my_data = json.dumps({'inputs': inputs})
serve.invoke(f'v2/models/PMTModel/infer', my_data)
Out[30]:
{'id': '514a9556-0d45-458e-8a75-9898e90b6534',
 'model_name': 'PMTModel',
 'outputs': [4]}
In [ ]: