Skip to content
LC-1948 Hard LeetCode

1948. Delete Duplicate Folders in System

Read the full problem statement on LeetCode.
Difficulty: hard Acceptance: 54% Topics: Array, Hash Table, String, Trie, Hash Function
View full problem on LeetCode
Reference solution (spoiler · python)
# Time:  O(n * m * l + tlogt + l * t), m is the max number of folders in a path,
#                                    , n is the number of paths
#                                    , l is the max length of folder name
#                                    , t is the size of trie
# Space: O(l * t)

import collections


class Solution(object):
    def deleteDuplicateFolder(self, paths):
        """
        :type paths: List[List[str]]
        :rtype: List[List[str]]
        """
        def mark(node, lookup, node_ids):
            id_pairs = []
            for subfolder_id, child in node.iteritems():
                if child == "_del":
                    continue
                id_pairs.append((subfolder_id, mark(child, lookup, node_ids)))
            id_pairs.sort()
            node_id = node_ids[tuple(id_pairs)]
            if node_id:
                if node_id in lookup:
                    lookup[node_id]["_del"]
                    node["_del"]
                else:
                    lookup[node_id] = node
            return node_id

        def sweep(node, id_folders, path, result):
            if path:
                result.append([id_folders[i] for i in path])
            for subfolder_id, child in node.iteritems():
                if "_del" in child:
                    continue
                path.append(subfolder_id)
                sweep(child, id_folders, path, result)
                path.pop()

        _trie = lambda: collections.defaultdict(_trie)
        trie = _trie()
        folder_ids = collections.defaultdict()
        folder_ids.default_factory = folder_ids.__len__
        id_folders = {}
        for path in paths:
            node = trie
            for folder in path:
                if folder_ids[folder] not in id_folders:
                    id_folders[folder_ids[folder]] = folder
                node = node[folder_ids[folder]]
        node_ids = collections.defaultdict()
        node_ids.default_factory = node_ids.__len__
        mark(trie, {}, node_ids)
        result = []
        sweep(trie, id_folders, [], result)
        return result


# Time:  O(n * m * l + l * tlogt + l * t^2), m is the max number of folders in a path,
#                                          , n is the number of paths
#                                          , l is the max length of folder name
#                                          , t is the size of trie
# Space: O(l * t^2)
import collections


class Solution2(object):
    def deleteDuplicateFolder(self, paths):
        """
        :type paths: List[List[str]]
        :rtype: List[List[str]]
        """
        def mark(node, lookup):
            serialized_tree = "(" + "".join(subfolder + mark(child, lookup) for subfolder, child in sorted(node.iteritems()) if child != "_del") + ")"
            if serialized_tree != "()":
                if serialized_tree in lookup:
                    lookup[serialized_tree]["_del"]
                    node["_del"]
                else:
                    lookup[serialized_tree] = node
            return serialized_tree
        
        def sweep(node, path, result):
            if path:
                result.append(path[:])
            for subfolder, child in node.iteritems():
                if "_del" in child:
                    continue
                path.append(subfolder)
                sweep(child, path, result)
                path.pop()

        _trie = lambda: collections.defaultdict(_trie)
        trie = _trie()
        for path in paths:
            reduce(dict.__getitem__, path, trie)
        mark(trie, {})
        result = []
        sweep(trie, [], result)
        return result

Solution from kamyu104/LeetCode-Solutions · MIT