Skip to content

Commit 132e180

Browse files
authored
Merge pull request #11 from comet-ml/dsb/update-mlflow
[NA] Update for modern comet_ml and mlflow
2 parents a572702 + c9b76e7 commit 132e180

File tree

5 files changed

+132
-29
lines changed

5 files changed

+132
-29
lines changed

README.md

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ Upload prepared data to Comet.ml? [y/N] y
8888
# Start uploading data to Comet.ml
8989
100%|███████████████████████████████████████████████████████████████████████| 6/6 [01:00<00:00, 15s/it]
9090
Explore your experiment data on Comet.ml with the following links:
91-
- https://www.comet.ml/kstewart/mlflow-default-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
92-
- https://www.comet.ml/kstewart/mlflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
93-
- https://www.comet.ml/kstewart/mlflow-tensorflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
94-
Get deeper instrumentation by adding Comet SDK to your project: https://comet.ml/docs/python-sdk/mlflow/
91+
- https://www.comet.com/kstewart/mlflow-default-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
92+
- https://www.comet.com/kstewart/mlflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
93+
- https://www.comet.com/kstewart/mlflow-tensorflow-keras-experiment-2bacc9?loginToken=NjKgD6f9ZuZWeudP76sDPHx9j
94+
Get deeper instrumentation by adding Comet SDK to your project: https://comet.com/docs/python-sdk/mlflow/
9595
9696
97-
If you need support, you can contact us at http://chat.comet.ml/ or https://comet.ml/docs/quick-start/#getting-support
97+
If you need support, you can contact us at http://chat.comet.com/ or https://comet.com/docs/quick-start/#getting-support
9898
```
9999

100100

@@ -122,6 +122,30 @@ Or with a MLFlow server:
122122
comet_for_mlflow --mlflow-store-uri http://localhost:5000
123123
```
124124

125+
### Authenticated MLflow stores (Databricks, etc.)
126+
127+
For authenticated MLflow stores like Databricks, you need to set authentication environment variables:
128+
129+
**For Databricks:**
130+
```bash
131+
export DATABRICKS_TOKEN=your_databricks_personal_access_token
132+
comet_for_mlflow --mlflow-store-uri https://your-workspace.cloud.databricks.com
133+
```
134+
135+
You can generate a Databricks personal access token by:
136+
1. Click on your user profile icon in the top-right corner of your Databricks workspace
137+
2. Select **User Settings**
138+
3. Go to the **Access Tokens** tab
139+
4. Click **Generate New Token**
140+
141+
**For other authenticated MLflow servers:**
142+
```bash
143+
# Username/password authentication
144+
export MLFLOW_TRACKING_USERNAME=your_username
145+
export MLFLOW_TRACKING_PASSWORD=your_password
146+
comet_for_mlflow --mlflow-store-uri https://your-mlflow-server.com
147+
```
148+
125149
## Importing MLFlow artifacts stored remotely
126150

127151
If your MLFlow runs have artifacts stored remotely (in any of supported remote artifact stores https://www.mlflow.org/docs/latest/tracking.html#artifact-stores), you need to configure your environment the same way as when you ran those experiments. For example, with a local Minio server:

comet_for_mlflow/comet_for_mlflow.py

Lines changed: 92 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@
3333
from os.path import abspath
3434
from zipfile import ZipFile
3535

36-
from comet_ml import API
37-
from comet_ml.comet import format_url
36+
from comet_ml import API, get_comet_api_client
3837
from comet_ml.config import get_api_key, get_config
39-
from comet_ml.connection import Reporting, get_comet_api_client, url_join
38+
from comet_ml.connection import Reporting
4039
from comet_ml.exceptions import CometRestApiException
4140
from comet_ml.offline import upload_single_offline_experiment
41+
from comet_ml.utils import merge_url, url_join
4242
from mlflow.entities.run_tag import RunTag
43+
from mlflow.exceptions import RestException
4344
from mlflow.tracking import _get_store
4445
from mlflow.tracking._model_registry.utils import _get_store as get_model_registry_store
4546
from mlflow.tracking.registry import UnsupportedModelRegistryStoreURIException
@@ -126,13 +127,40 @@ def __init__(
126127
output_dir = tempfile.mkdtemp()
127128

128129
# MLFlow conversion
129-
self.store = _get_store(mlflow_store_uri)
130+
try:
131+
self.store = _get_store(mlflow_store_uri)
132+
except RestException as e:
133+
if self._is_authentication_error(e):
134+
self._log_authentication_error(
135+
mlflow_store_uri, "connecting to MLflow store"
136+
)
137+
raise
138+
except Exception as e:
139+
if self._is_authentication_error(e):
140+
self._log_authentication_error(
141+
mlflow_store_uri, "connecting to MLflow store"
142+
)
143+
raise
144+
130145
try:
131146
self.model_registry_store = get_model_registry_store(mlflow_store_uri)
132147
except UnsupportedModelRegistryStoreURIException:
133148
self.model_registry_store = None
134149

135-
self.mlflow_experiments = search_mlflow_store_experiments(self.store)
150+
try:
151+
self.mlflow_experiments = search_mlflow_store_experiments(self.store)
152+
except RestException as e:
153+
if self._is_authentication_error(e):
154+
self._log_authentication_error(
155+
mlflow_store_uri, "accessing MLflow experiments"
156+
)
157+
raise
158+
except Exception as e:
159+
if self._is_authentication_error(e):
160+
self._log_authentication_error(
161+
mlflow_store_uri, "accessing MLflow experiments"
162+
)
163+
raise
136164
self.len_experiments = len(self.mlflow_experiments) # We start counting at 0
137165

138166
self.summary = {
@@ -206,7 +234,7 @@ def prepare(self):
206234
LOGGER.info(
207235
tabulate(
208236
table,
209-
headers=["MLFlow name:", "Comet.ml name:", "Prepared count:"],
237+
headers=["MLFlow name:", "Comet ML name:", "Prepared count:"],
210238
tablefmt="presto",
211239
)
212240
)
@@ -217,7 +245,7 @@ def prepare(self):
217245
# Upload or not?
218246
print("")
219247
if self.answer is None:
220-
upload = input("Upload prepared data to Comet.ml? [y/N] ") in ("Y", "y")
248+
upload = input("Upload prepared data to Comet ML? [y/N] ") in ("Y", "y")
221249
else:
222250
upload = self.answer
223251
print("")
@@ -232,8 +260,8 @@ def prepare(self):
232260

233261
LOGGER.info("")
234262
LOGGER.info(
235-
"""If you need support, you can contact us at http://chat.comet.ml/"""
236-
""" or https://comet.ml/docs/quick-start/#getting-support"""
263+
"""If you need support, you can contact us at http://chat.comet.com/"""
264+
""" or https://comet.com/docs/quick-start/#getting-support"""
237265
)
238266
LOGGER.info("")
239267

@@ -319,11 +347,11 @@ def prepare_single_mlflow_run(self, run, original_experiment_name):
319347
base_url = url_join(
320348
self.api_client.server_url, "/api/experiment/redirect"
321349
)
322-
tags["mlflow.parentRunUrl"] = format_url(
323-
base_url, experimentKey=tags["mlflow.parentRunId"]
350+
tags["mlflow.parentRunUrl"] = merge_url(
351+
base_url, {"experimentKey": tags["mlflow.parentRunId"]}
324352
)
325353

326-
# Save the original MLFlow experiment name too as Comet.ml project might
354+
# Save the original MLFlow experiment name too as Comet.com project might
327355
# get renamed
328356
tags["mlflow.experimentName"] = original_experiment_name
329357

@@ -449,7 +477,7 @@ def get_model_prefixes(self, artifact_list):
449477
return models
450478

451479
def upload(self, prepared_data):
452-
LOGGER.info("# Start uploading data to Comet.ml")
480+
LOGGER.info("# Start uploading data to Comet ML")
453481

454482
all_project_names = []
455483

@@ -494,7 +522,7 @@ def upload(self, prepared_data):
494522

495523
LOGGER.info("")
496524
LOGGER.info(
497-
"Explore your experiment data on Comet.ml with the following links:",
525+
"Explore your experiment data on Comet ML with the following links:",
498526
)
499527
if len(all_project_names) < 6:
500528
for project_name in all_project_names:
@@ -516,7 +544,7 @@ def upload(self, prepared_data):
516544

517545
LOGGER.info(
518546
"Get deeper instrumentation by adding Comet SDK to your project:"
519-
" https://comet.ml/docs/python-sdk/mlflow/"
547+
" https://comet.com/docs/python-sdk/mlflow/"
520548
)
521549
LOGGER.info("")
522550

@@ -593,7 +621,7 @@ def get_or_create_comet_project(self, exp):
593621

594622
def create_or_login(self):
595623
auth_api_client = get_comet_api_client(None)
596-
LOGGER.info("Please create a free Comet account with your email.")
624+
LOGGER.info("Please create a free Comet.com account with your email.")
597625
if self.email is None:
598626
email = input("Email: ")
599627
print("")
@@ -627,23 +655,23 @@ def create_or_login(self):
627655
Reporting.report("mlflow_new_user", api_key=new_account["apiKey"])
628656

629657
LOGGER.info(
630-
"A Comet.ml account has been created for you and an email was sent to"
658+
"A Comet.com account has been created for you and an email was sent to"
631659
" you to setup your password later."
632660
)
633661
save_api_key(new_account["apiKey"])
634662
LOGGER.info(
635-
"Your Comet API Key has been saved to ~/.comet.ini, it is also"
636-
" available on your Comet.ml dashboard."
663+
"Your Comet API Key has been saved to ~/.comet.config, it is also"
664+
" available on your Comet.com dashboard."
637665
)
638666
return (
639667
new_account["apiKey"],
640668
new_account["token"],
641669
)
642670
else:
643671
LOGGER.info(
644-
"An account already exists for this account, please input your API Key"
672+
"An account already exists for this email, please input your API Key"
645673
" below (you can find it in your Settings page,"
646-
" https://comet.ml/docs/quick-start/#getting-your-comet-api-key):"
674+
" https://comet.com/docs/quick-start/#getting-your-comet-api-key):"
647675
)
648676
api_key = input("API Key: ")
649677

@@ -663,3 +691,46 @@ def get_api_key_or_login(self, api_key):
663691
Reporting.report("mlflow_existing_user", api_key=api_key)
664692

665693
return (api_key, None)
694+
695+
def _is_authentication_error(self, exception):
696+
"""Check if an exception is an authentication error (401)."""
697+
error_msg = str(exception)
698+
status_code = None
699+
700+
# Check HTTP status code for RestException
701+
if hasattr(exception, "get_http_status_code"):
702+
status_code = exception.get_http_status_code()
703+
704+
return (
705+
status_code == 401
706+
or "401" in error_msg
707+
or "Credential" in error_msg
708+
or "authentication" in error_msg.lower()
709+
)
710+
711+
def _log_authentication_error(self, mlflow_store_uri, context):
712+
"""Log helpful error message for MLflow authentication errors."""
713+
LOGGER.error("")
714+
LOGGER.error("MLflow authentication error detected when %s.", context)
715+
LOGGER.error("")
716+
if mlflow_store_uri and "databricks" in mlflow_store_uri.lower():
717+
LOGGER.error(
718+
"For Databricks MLflow stores, you need to set the "
719+
"DATABRICKS_TOKEN environment variable:"
720+
)
721+
LOGGER.error(
722+
" export DATABRICKS_TOKEN=your_databricks_personal_access_token"
723+
)
724+
LOGGER.error("")
725+
LOGGER.error("You can generate a token by:")
726+
LOGGER.error(" 1. Click on your user profile icon in the top-right corner")
727+
LOGGER.error(" 2. Select 'User Settings'")
728+
LOGGER.error(" 3. Go to the 'Access Tokens' tab")
729+
LOGGER.error(" 4. Click 'Generate New Token'")
730+
else:
731+
LOGGER.error("For authenticated MLflow stores, you may need to set:")
732+
LOGGER.error(" - DATABRICKS_TOKEN (for Databricks token-based auth)")
733+
LOGGER.error(
734+
" - MLFLOW_TRACKING_USERNAME and MLFLOW_TRACKING_PASSWORD (for basic auth)"
735+
)
736+
LOGGER.error("")

comet_for_mlflow/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def save_api_key(api_key):
109109
with open(config_path, "wt") as config_file:
110110
config_file.write("# Config file for Comet.ml\n")
111111
config_file.write(
112-
"# For help see https://www.comet.ml/docs/python-sdk/getting-started/\n"
112+
"# For help see https://www.comet.com/docs/python-sdk/getting-started/\n"
113113
)
114114
config_file.write("")
115115

examples/keras-example/run.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
from tensorflow.keras.models import Sequential
1212
from tensorflow.keras.preprocessing.text import Tokenizer
1313

14+
# The following MLflow settings are specific to Databricks.
15+
# They should only be used when running in a Databricks environment.
16+
# If running outside Databricks, you may need to set a different tracking URI
17+
# and experiment, or remove these lines entirely.
18+
19+
mlflow.set_tracking_uri("databricks")
20+
mlflow.set_experiment("/Shared/keras_example")
21+
1422
mlflow.tensorflow.autolog()
1523

1624
max_words = 1000

tests/test_comet_for_mlflow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
from random import randint, random
99

1010
import responses
11-
from comet_ml.connection import url_join
11+
from comet_ml.utils import url_join
1212
from mlflow import active_run, end_run, log_artifacts, log_metric, log_param, tracking
1313

1414
from comet_for_mlflow import comet_for_mlflow
1515

1616
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
1717

18-
SERVER_ADDRESS = "https://www.comet.ml/clientlib/"
18+
SERVER_ADDRESS = "https://www.comet.com/clientlib/"
1919

2020

2121
def mlflow_example():

0 commit comments

Comments
 (0)