Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ Upload prepared data to Comet.ml? [y/N] y
# Start uploading data to Comet.ml
100%|███████████████████████████████████████████████████████████████████████| 6/6 [01:00<00:00, 15s/it]
Explore your experiment data on Comet.ml with the following links:
- https://www.comet.ml/kstewart/mlflow-default-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
- https://www.comet.ml/kstewart/mlflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
- https://www.comet.ml/kstewart/mlflow-tensorflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
Get deeper instrumentation by adding Comet SDK to your project: https://comet.ml/docs/python-sdk/mlflow/
- https://www.comet.com/kstewart/mlflow-default-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
- https://www.comet.com/kstewart/mlflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
- https://www.comet.com/kstewart/mlflow-tensorflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
Get deeper instrumentation by adding Comet SDK to your project: https://comet.com/docs/python-sdk/mlflow/


If you need support, you can contact us at http://chat.comet.ml/ or https://comet.ml/docs/quick-start/#getting-support
If you need support, you can contact us at http://chat.comet.com/ or https://comet.com/docs/quick-start/#getting-support
```


Expand Down Expand Up @@ -122,6 +122,30 @@ Or with a MLFlow server:
comet_for_mlflow --mlflow-store-uri http://localhost:5000
```

### Authenticated MLflow stores (Databricks, etc.)

For authenticated MLflow stores like Databricks, you need to set authentication environment variables:

**For Databricks:**
```bash
export DATABRICKS_TOKEN=your_databricks_personal_access_token
comet_for_mlflow --mlflow-store-uri https://your-workspace.cloud.databricks.com
```

You can generate a Databricks personal access token by:
1. Click on your user profile icon in the top-right corner of your Databricks workspace
2. Select **User Settings**
3. Go to the **Access Tokens** tab
4. Click **Generate New Token**

**For other authenticated MLflow servers:**
```bash
# Username/password authentication
export MLFLOW_TRACKING_USERNAME=your_username
export MLFLOW_TRACKING_PASSWORD=your_password
comet_for_mlflow --mlflow-store-uri https://your-mlflow-server.com
```

## Importing MLFlow artifacts stored remotely

If your MLFlow runs have artifacts stored remotely (in any of supported remote artifact stores https://www.mlflow.org/docs/latest/tracking.html#artifact-stores), you need to configure your environment the same way as when you ran those experiments. For example, with a local Minio server:
Expand Down
113 changes: 92 additions & 21 deletions comet_for_mlflow/comet_for_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@
from os.path import abspath
from zipfile import ZipFile

from comet_ml import API
from comet_ml.comet import format_url
from comet_ml import API, get_comet_api_client
from comet_ml.config import get_api_key, get_config
from comet_ml.connection import Reporting, get_comet_api_client, url_join
from comet_ml.connection import Reporting
from comet_ml.exceptions import CometRestApiException
from comet_ml.offline import upload_single_offline_experiment
from comet_ml.utils import merge_url, url_join
from mlflow.entities.run_tag import RunTag
from mlflow.exceptions import RestException
from mlflow.tracking import _get_store
from mlflow.tracking._model_registry.utils import _get_store as get_model_registry_store
from mlflow.tracking.registry import UnsupportedModelRegistryStoreURIException
Expand Down Expand Up @@ -126,13 +127,40 @@ def __init__(
output_dir = tempfile.mkdtemp()

# MLFlow conversion
self.store = _get_store(mlflow_store_uri)
try:
self.store = _get_store(mlflow_store_uri)
except RestException as e:
if self._is_authentication_error(e):
self._log_authentication_error(
mlflow_store_uri, "connecting to MLflow store"
)
raise
except Exception as e:
if self._is_authentication_error(e):
self._log_authentication_error(
mlflow_store_uri, "connecting to MLflow store"
)
raise

try:
self.model_registry_store = get_model_registry_store(mlflow_store_uri)
except UnsupportedModelRegistryStoreURIException:
self.model_registry_store = None

self.mlflow_experiments = search_mlflow_store_experiments(self.store)
try:
self.mlflow_experiments = search_mlflow_store_experiments(self.store)
except RestException as e:
if self._is_authentication_error(e):
self._log_authentication_error(
mlflow_store_uri, "accessing MLflow experiments"
)
raise
except Exception as e:
if self._is_authentication_error(e):
self._log_authentication_error(
mlflow_store_uri, "accessing MLflow experiments"
)
raise
self.len_experiments = len(self.mlflow_experiments) # We start counting at 0

self.summary = {
Expand Down Expand Up @@ -206,7 +234,7 @@ def prepare(self):
LOGGER.info(
tabulate(
table,
headers=["MLFlow name:", "Comet.ml name:", "Prepared count:"],
headers=["MLFlow name:", "Comet ML name:", "Prepared count:"],
tablefmt="presto",
)
)
Expand All @@ -217,7 +245,7 @@ def prepare(self):
# Upload or not?
print("")
if self.answer is None:
upload = input("Upload prepared data to Comet.ml? [y/N] ") in ("Y", "y")
upload = input("Upload prepared data to Comet ML? [y/N] ") in ("Y", "y")
else:
upload = self.answer
print("")
Expand All @@ -232,8 +260,8 @@ def prepare(self):

LOGGER.info("")
LOGGER.info(
"""If you need support, you can contact us at http://chat.comet.ml/"""
""" or https://comet.ml/docs/quick-start/#getting-support"""
"""If you need support, you can contact us at http://chat.comet.com/"""
""" or https://comet.com/docs/quick-start/#getting-support"""
)
LOGGER.info("")

Expand Down Expand Up @@ -319,11 +347,11 @@ def prepare_single_mlflow_run(self, run, original_experiment_name):
base_url = url_join(
self.api_client.server_url, "/api/experiment/redirect"
)
tags["mlflow.parentRunUrl"] = format_url(
base_url, experimentKey=tags["mlflow.parentRunId"]
tags["mlflow.parentRunUrl"] = merge_url(
base_url, {"experimentKey": tags["mlflow.parentRunId"]}
)

# Save the original MLFlow experiment name too as Comet.ml project might
# Save the original MLFlow experiment name too as Comet.com project might
# get renamed
tags["mlflow.experimentName"] = original_experiment_name

Expand Down Expand Up @@ -449,7 +477,7 @@ def get_model_prefixes(self, artifact_list):
return models

def upload(self, prepared_data):
LOGGER.info("# Start uploading data to Comet.ml")
LOGGER.info("# Start uploading data to Comet ML")

all_project_names = []

Expand Down Expand Up @@ -494,7 +522,7 @@ def upload(self, prepared_data):

LOGGER.info("")
LOGGER.info(
"Explore your experiment data on Comet.ml with the following links:",
"Explore your experiment data on Comet ML with the following links:",
)
if len(all_project_names) < 6:
for project_name in all_project_names:
Expand All @@ -516,7 +544,7 @@ def upload(self, prepared_data):

LOGGER.info(
"Get deeper instrumentation by adding Comet SDK to your project:"
" https://comet.ml/docs/python-sdk/mlflow/"
" https://comet.com/docs/python-sdk/mlflow/"
)
LOGGER.info("")

Expand Down Expand Up @@ -593,7 +621,7 @@ def get_or_create_comet_project(self, exp):

def create_or_login(self):
auth_api_client = get_comet_api_client(None)
LOGGER.info("Please create a free Comet account with your email.")
LOGGER.info("Please create a free Comet.com account with your email.")
if self.email is None:
email = input("Email: ")
print("")
Expand Down Expand Up @@ -627,23 +655,23 @@ def create_or_login(self):
Reporting.report("mlflow_new_user", api_key=new_account["apiKey"])

LOGGER.info(
"A Comet.ml account has been created for you and an email was sent to"
"A Comet.com account has been created for you and an email was sent to"
" you to setup your password later."
)
save_api_key(new_account["apiKey"])
LOGGER.info(
"Your Comet API Key has been saved to ~/.comet.ini, it is also"
" available on your Comet.ml dashboard."
"Your Comet API Key has been saved to ~/.comet.config, it is also"
" available on your Comet.com dashboard."
)
return (
new_account["apiKey"],
new_account["token"],
)
else:
LOGGER.info(
"An account already exists for this account, please input your API Key"
"An account already exists for this email, please input your API Key"
" below (you can find it in your Settings page,"
" https://comet.ml/docs/quick-start/#getting-your-comet-api-key):"
" https://comet.com/docs/quick-start/#getting-your-comet-api-key):"
)
api_key = input("API Key: ")

Expand All @@ -663,3 +691,46 @@ def get_api_key_or_login(self, api_key):
Reporting.report("mlflow_existing_user", api_key=api_key)

return (api_key, None)

def _is_authentication_error(self, exception):
"""Check if an exception is an authentication error (401)."""
error_msg = str(exception)
status_code = None

# Check HTTP status code for RestException
if hasattr(exception, "get_http_status_code"):
status_code = exception.get_http_status_code()

return (
status_code == 401
or "401" in error_msg
or "Credential" in error_msg
or "authentication" in error_msg.lower()
)

def _log_authentication_error(self, mlflow_store_uri, context):
"""Log helpful error message for MLflow authentication errors."""
LOGGER.error("")
LOGGER.error("MLflow authentication error detected when %s.", context)
LOGGER.error("")
if mlflow_store_uri and "databricks" in mlflow_store_uri.lower():
LOGGER.error(
"For Databricks MLflow stores, you need to set the "
"DATABRICKS_TOKEN environment variable:"
)
LOGGER.error(
" export DATABRICKS_TOKEN=your_databricks_personal_access_token"
)
LOGGER.error("")
LOGGER.error("You can generate a token by:")
LOGGER.error(" 1. Click on your user profile icon in the top-right corner")
LOGGER.error(" 2. Select 'User Settings'")
LOGGER.error(" 3. Go to the 'Access Tokens' tab")
LOGGER.error(" 4. Click 'Generate New Token'")
else:
LOGGER.error("For authenticated MLflow stores, you may need to set:")
LOGGER.error(" - DATABRICKS_TOKEN (for Databricks token-based auth)")
LOGGER.error(
" - MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD (for basic auth)"
)
LOGGER.error("")
2 changes: 1 addition & 1 deletion comet_for_mlflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def save_api_key(api_key):
with open(config_path, "wt") as config_file:
config_file.write("# Config file for Comet.ml\n")
config_file.write(
"# For help see https://www.comet.ml/docs/python-sdk/getting-started/\n"
"# For help see https://www.comet.com/docs/python-sdk/getting-started/\n"
)
config_file.write("")

Expand Down
8 changes: 8 additions & 0 deletions examples/keras-example/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer

# The following MLflow settings are specific to Databricks.
# They should only be used when running in a Databricks environment.
# If running outside Databricks, you may need to set a different tracking URI
# and experiment, or remove these lines entirely.

mlflow.set_tracking_uri("databricks")
mlflow.set_experiment("/Shared/keras_example")

mlflow.tensorflow.autolog()

max_words = 1000
Expand Down
4 changes: 2 additions & 2 deletions tests/test_comet_for_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from random import randint, random

import responses
from comet_ml.connection import url_join
from comet_ml.utils import url_join
from mlflow import active_run, end_run, log_artifacts, log_metric, log_param, tracking

from comet_for_mlflow import comet_for_mlflow

CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))

SERVER_ADDRESS = "https://www.comet.ml/clientlib/"
SERVER_ADDRESS = "https://www.comet.com/clientlib/"


def mlflow_example():
Expand Down
Loading