Skip to main content

Nested folders and python

Nested Folders

Sections can be organized in a hierarchal structure, starting with the release of Tator 1.3. To accomplish this, a new path field was introduced to the Tator Section object. The path field determines the layout of sections using named identifiers for each section in a particular sequence.

Note: Sections is the Tator API term, but folders are used synonymously here as they represent folders containing media or subfolders.

Section Name

The section’s name field dictates what is displayed in Tator. The only restriction the field value has is that it must contain a letter or digit. Unlike previous versions of Tator, the name field does not need to be unique.

If the name field is “Folder A”, Tator will display “Folder A” as its title. If the name field is “Folder 1.2”, Tator will display “Folder 1.2” as its title.

Section Path

The path field contains the parent folder’s path appended with a formatted version of the section’s name. If the section does not have a parent, the section is assumed to be a top-level folder. A parent to a section is denoted using a period in between the formatted names. Here is an example of the section paths and names for the given nested folder layout:

Section IDNamePathParent Section Path
100AANone
101BA.BA
102CA.B.CA.B

The path value may only contain letters, digits, underscores, or periods. Here is a python example of how to convert the desired section name, to its corresponding path.

import re
name = “Folder A”
path = re.sub(r'[^A-Za-z0-9_]', '_', name)

In the example above, the path field is “Folder_A”, where the space is replaced by an underscore. Here is a more complex example highlighting the path formatting and structure.

Section IDNamePathParent Section Path
1000Folder AFolder_ANone
1001Folder A – 1.2Folder_A.Folder_A___1_2Folder_A
1002Subfolder 1Folder_A.Folder_A___1_2.Subfolder_1Folder_A.Folder_A___1_2
1003Subfolder 2Folder_A.Folder_A___1_2.Subfolder_2Folder_A.Folder_A___1_2
1004Folder A – 1.3Folder_A.Folder_A___1_3Folder_A
1005Subfolder 1Folder_A.Folder_A___1_3.Subfolder_1Folder_A.Folder_A___1_3

Finally, the path field must be unique. The section can share the same name as other sections, but there cannot be duplicate paths within the same project. Consequently, a section cannot have the same name as another section if they have the same parent.

Python Example: Upload Directory of Videos

In this example, we will assume that we are ingesting a set of media to Tator into a set of new nested sections mimicking the on-disk directory layout. Of note, this tool makes use of a new section_id parameter to denote which section to place media into. As a result, it is best practice to create the sections first, and then upload the media.

""" Tool used to ingest media into Tator from a local directory structure.
"""

import argparse
import glob
import json
import os
import re
import uuid
import yaml

import tator

def parse_args() -> argparse.Namespace:
""" Parse script arguments
"""

parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("--host", type=str, default="https://cloud.tator.io")
parser.add_argument("--token", type=str, required=True)
parser.add_argument("--tator-config", type=str, required=True, help=".yaml file of the Tator configuration")
parser.add_argument("--dir", type=str, required=True, help="Directory containing media to ingest")
parser.add_argument("--top-level-section-id", type=int, required=True, help="Top level section to ingest media into")
parser.add_argument("--video-ext", type=str, default=".mp4", help="Extension of video files to ingest")
args = parser.parse_args()
return args

def script_main() -> None:
""" Script's main function
"""

args = parse_args()

# Setup connection to Tator
with open(args.tator_config, "r") as file_handle:
tator_config = yaml.safe_load(file_handle)

tator_api = tator.get_api(host=args.host, token=args.token)

# Grab the local files to ingest
video_extension = args.video_extension
search_str = os.path.join(args.dir, f"**/*/*{video_extension}")
video_file_paths = glob.glob(search_str, recursive=True)
print(f"Found {len(video_file_paths)} video files to ingest.")

# We want to create the folders in Tator that match the local file structure
# The top level section ID will match the provided parent directory, so subfolders
# in the parent directory will result in new sections in Tator.
start_path = args.dir
if start_path[-1] == os.sep:
start_path = start_path[:-1]
start_path_parts = start_path.split(os.sep)
directory_structure = {}
for path in video_file_paths:
parts = path.split(os.sep)
if start_path_parts == parts[:len(start_path_parts)]:
relevant_parts = parts[len(start_path_parts):]
current_level = directory_structure
for part in relevant_parts:
if part not in current_level:
current_level[part] = {}
current_level = current_level[part]

# Output the directory structure to a json file for review
with open("directory_structure.json", "w") as file_handle:
json.dump(directory_structure, file_handle, indent=4)

# With the hierarchy built, we can now create the sections in Tator by recursively walking the structure.
parent_section = tator_api.get_section(id=args.top_level_section_id)

new_section_specs = []
def _recursive_build_section_specs(directory_structure, parent_section_path, local_parent_path):
for folder_name, subfolders in directory_structure.items():
if video_extension in folder_name:
continue
sanitized_folder_name = re.sub(r"[^A-Za-z0-9_]", "_", folder_name)
new_section_path = f"{parent_section_path}.{sanitized_folder_name}"
local_path = local_parent_path + os.sep + folder_name
new_section_specs.append({
"folder": local_path,
"spec": {
"name": folder_name,
"path": new_section_path,
"tator_user_section": str(uuid.uuid4()),
"visible": True
}
})
_recursive_build_section_specs(subfolders, new_section_path, local_path)
_recursive_build_section_specs(directory_structure, parent_section.path, start_path)

# Output the specs to a json file for review
with open("new_section_specs.json", "w") as file_handle:
json.dump(new_section_specs, file_handle, indent=4)

# Ask the user if they wish to proceed with the new sections to create
answer = input("Do you wish to proceed with creating the new sections/media? (Y/n): ")
if answer != "Y":
print(f"fin.")
return

# Create the new sections in Tator
folder_to_section_map = {}
for spec in new_section_specs:
response = tator_api.create_section(
project=tator_config["project"],
section_spec=spec["spec"])
folder_to_section_map[spec["folder"]] = response.id

# Now that the sections are created, we can ingest the media and place them
# in the appropriate folder. We will use the section ID to place the media.
for index, local_file_path in enumerate(video_file_paths):
folder = os.path.dirname(local_file_path)
section_id = folder_to_section_map[folder]
for progress, response in tator.util.upload_media(
api=tator_api,
type_id=tator_config["video_type"],
path=local_file_path,
section_id=section_id):
print(f"[{index+1}/{len(video_file_paths)}] {local_file_path} - {progress}")
print(response)

if __name__ == "__main__":
script_main()