[Solved] How to efficiently compare two maps?

[ad_1]

Since your files are sorted, you don’t have to store them or even parse them outside of lines. You can keep advancing in the sequence with the lesser current element:

def count_equal(a, b):
    """
    Counts the number of values that are equal in two sorted iterables.

    >>> odds = [1, 3, 5, 7, 9, 11, 13, 15]
    >>> primes = [2, 3, 5, 7, 11, 13]
    >>> count_equal(odds, primes)
    5
    """
    return _count_equal(iter(a), iter(b))


def _count_equal(a, b):
    c = 0
    x = next(a)
    y = next(b)

    try:
        while True:
            while x < y:
                x = next(a)

            while y < x:
                y = next(b)

            if x == y:
                c += 1
                x = next(a)
                y = next(b)
    except StopIteration:
        return c

You can keep track of how many lines there are in each file separately in the same read:

from __future__ import division


class CountingIterable:
    def __init__(self, iterable):
        self.iterable = iterable

    def __iter__(self):
        count = 0

        for x in self.iterable:
            yield x
            count += 1

        self.count = count


with open('file1.txt', 'r') as a, open('file2.txt', 'r') as b:
    a_counter = CountingIterable(a)
    b_counter = CountingIterable(b)

    a_iterator = iter(a_counter)
    b_iterator = iter(b_counter)

    n = count_equal(a_iterator, b_iterator)

    # consume any remaining elements to acquire count
    for _ in a_iterator: pass
    for _ in b_iterator: pass

    result = n / max(a_counter.count, b_counter.count)

1

[ad_2]

solved How to efficiently compare two maps?