@@ -370,9 +370,27 @@ def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
370
370
if isinstance (model_id , str ):
371
371
if model_id .startswith ("http://" ) or model_id .startswith ("https://" ) or model_id .startswith ("ssh://" ):
372
372
base_model ["repo_url" ] = model_id
373
+
374
+ # Check if Hugging Face ID is present in URL
375
+ if "huggingface.co" in model_id :
376
+ match = re .match (r"https?://huggingface.co/([^/]+/[^/]+)$" , model_id )
377
+ if match :
378
+ model_id_component = match .group (1 )
379
+ model_full_name_component , org_component , basename , finetune , version , size_label = Metadata .get_model_id_components (model_id_component , total_params )
380
+
381
+ # Populate model dictionary with extracted components
382
+ if model_full_name_component is not None :
383
+ base_model ["name" ] = Metadata .id_to_title (model_full_name_component )
384
+ if org_component is not None :
385
+ base_model ["organization" ] = Metadata .id_to_title (org_component )
386
+ if version is not None :
387
+ base_model ["version" ] = version
388
+
373
389
else :
374
390
# Likely a Hugging Face ID
375
391
model_full_name_component , org_component , basename , finetune , version , size_label = Metadata .get_model_id_components (model_id , total_params )
392
+
393
+ # Populate model dictionary with extracted components
376
394
if model_full_name_component is not None :
377
395
base_model ["name" ] = Metadata .id_to_title (model_full_name_component )
378
396
if org_component is not None :
@@ -405,11 +423,29 @@ def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
405
423
# NOTE: model size of base model is assumed to be similar to the size of the current model
406
424
dataset = {}
407
425
if isinstance (dataset_id , str ):
408
- if dataset_id .startswith ("http://" ) or dataset_id . startswith ( "https://" ) or dataset_id . startswith ( "ssh://" ):
426
+ if dataset_id .startswith (( "http://" , "https://" , "ssh://" ) ):
409
427
dataset ["repo_url" ] = dataset_id
428
+
429
+ # Check if Hugging Face ID is present in URL
430
+ if "huggingface.co" in dataset_id :
431
+ match = re .match (r"https?://huggingface.co/([^/]+/[^/]+)$" , dataset_id )
432
+ if match :
433
+ dataset_id_component = match .group (1 )
434
+ dataset_name_component , org_component , basename , finetune , version , size_label = Metadata .get_model_id_components (dataset_id_component , total_params )
435
+
436
+ # Populate dataset dictionary with extracted components
437
+ if dataset_name_component is not None :
438
+ dataset ["name" ] = Metadata .id_to_title (dataset_name_component )
439
+ if org_component is not None :
440
+ dataset ["organization" ] = Metadata .id_to_title (org_component )
441
+ if version is not None :
442
+ dataset ["version" ] = version
443
+
410
444
else :
411
445
# Likely a Hugging Face ID
412
446
dataset_name_component , org_component , basename , finetune , version , size_label = Metadata .get_model_id_components (dataset_id , total_params )
447
+
448
+ # Populate dataset dictionary with extracted components
413
449
if dataset_name_component is not None :
414
450
dataset ["name" ] = Metadata .id_to_title (dataset_name_component )
415
451
if org_component is not None :
@@ -418,6 +454,7 @@ def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
418
454
dataset ["version" ] = version
419
455
if org_component is not None and dataset_name_component is not None :
420
456
dataset ["repo_url" ] = f"https://huggingface.co/{ org_component } /{ dataset_name_component } "
457
+
421
458
elif isinstance (dataset_id , dict ):
422
459
dataset = dataset_id
423
460
else :
0 commit comments