[pypy-commit] pypy bitset-intsets: implement difference and issubset

cfbolz pypy.commits at gmail.com
Mon Feb 11 06:57:39 EST 2019


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: bitset-intsets
Changeset: r95941:9481b4febfd0
Date: 2016-02-27 16:33 +0100
http://bitbucket.org/pypy/pypy/changeset/9481b4febfd0/

Log:	implement difference and issubset

diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -1563,6 +1563,106 @@
         w_set.strategy = strategy
         w_set.sstorage = storage
 
+    def difference(self, w_set, w_other):
+        storage = self._difference_base(w_set, w_other)
+        w_newset = w_set.from_storage_and_strategy(storage, w_set.strategy)
+        return w_newset
+
+    def _difference_base(self, w_set, w_other):
+        if self is w_other.strategy:
+            storage = self._difference_unwrapped(w_set, w_other)
+        elif not w_set.strategy.may_contain_equal_elements(w_other.strategy):
+            d = self.unerase(w_set.sstorage)
+            storage = self.erase(d.copy())
+        else:
+            storage = self._difference_wrapped(w_set, w_other)
+        return storage
+
+    def _difference_unwrapped(self, w_set, w_other):
+        self_dict = self.unerase(w_set.sstorage)
+        other_dict = self.unerase(w_other.sstorage)
+        result_dict = self.get_empty_dict()
+        for key, value in self_dict.iteritems():
+            new = value & (~other_dict.get(key, 0))
+            if new:
+                result_dict[key] = new
+        return self.erase(result_dict)
+
+    def _difference_update_unwrapped(self, w_set, w_other):
+        my_dict = self.unerase(w_set.sstorage)
+        if w_set.sstorage is w_other.sstorage:
+            my_dict.clear()
+            return
+        other_dict = self.unerase(w_other.sstorage)
+        for key, value in other_dict.iteritems():
+            try:
+                new = my_dict[key] & (~value)
+            except KeyError:
+                pass
+            else:
+                if new:
+                    my_dict[key] = new
+                else:
+                    del my_dict[key]
+
+    def _difference_update_wrapped(self, w_set, w_other):
+        w_iterator = w_other.iter()
+        while True:
+            w_item = w_iterator.next_entry()
+            if w_item is None:
+                break
+            w_set.remove(w_item)
+
+    def difference_update(self, w_set, w_other):
+        if self.length(w_set) < w_other.strategy.length(w_other):
+            # small_set -= big_set: compute the difference as a new set
+            storage = self._difference_base(w_set, w_other)
+            w_set.sstorage = storage
+        else:
+            # big_set -= small_set: be more subtle
+            if self is w_other.strategy:
+                self._difference_update_unwrapped(w_set, w_other)
+            elif w_set.strategy.may_contain_equal_elements(w_other.strategy):
+                self._difference_update_wrapped(w_set, w_other)
+
+    def equals(self, w_set, w_other):
+        if w_set.length() != w_other.length():
+            return False
+        if w_set.length() == 0:
+            return True
+        # it's possible to have 0-length strategy that's not empty
+        if w_set.strategy is w_other.strategy:
+            return self._issubset_unwrapped(w_set, w_other)
+        if not self.may_contain_equal_elements(w_other.strategy):
+            return False
+        items = self.unerase(w_set.sstorage).keys()
+        for key in items:
+            if not w_other.has_key(self.wrap(key)):
+                return False
+        return True
+
+    def _issubset_unwrapped(self, w_set, w_other):
+        d_set = self.unerase(w_set.sstorage)
+        d_other = self.unerase(w_other.sstorage)
+        for key, keyhash in iterkeys_with_hash(d_set):
+            if not contains_with_hash(d_other, key, keyhash):
+                return False
+        return True
+
+    def _issubset_wrapped(self, w_set, w_other):
+        XXX
+
+    def issubset(self, w_set, w_other):
+        if w_set.length() == 0:
+            return True
+
+        if w_set.strategy is w_other.strategy:
+            return self._issubset_unwrapped(w_set, w_other)
+        elif not w_set.strategy.may_contain_equal_elements(w_other.strategy):
+            return False
+        else:
+            return self._issubset_wrapped(w_set, w_other)
+
 
 class ObjectSetStrategy(AbstractUnwrappedSetStrategy, SetStrategy):
     erase, unerase = rerased.new_erasing_pair("object")
diff --git a/pypy/objspace/std/test/test_setobject.py b/pypy/objspace/std/test/test_setobject.py
--- a/pypy/objspace/std/test/test_setobject.py
+++ b/pypy/objspace/std/test/test_setobject.py
@@ -93,7 +93,7 @@
         w_set = W_SetObject(self.space)
         _initialize_set(self.space, w_set, w_list)
         assert w_set.strategy is intstr
-        assert intstr.unerase(w_set.sstorage) == {1:None, 2:None, 3:None}
+        #assert intstr.unerase(w_set.sstorage) == {1:None, 2:None, 3:None}
 
         w_list = W_ListObject(self.space, [w("1"), w("2"), w("3")])
         w_set = W_SetObject(self.space)
diff --git a/pypy/objspace/std/test/test_setstrategies.py b/pypy/objspace/std/test/test_setstrategies.py
--- a/pypy/objspace/std/test/test_setstrategies.py
+++ b/pypy/objspace/std/test/test_setstrategies.py
@@ -212,6 +212,21 @@
         # XXX check that no additional keys
 
     @given(intlists, intlists)
+    def test_difference_update(self, c1, c2):
+        s1 = self.intset(c1)
+        s2 = self.intset(c2)
+        s1.difference_update(s2)
+        s1.length()
+        for i in c1:
+            if i not in c2:
+                assert s1.has_key(self.wrap(i))
+            else:
+                assert not s1.has_key(self.wrap(i))
+        for i in c2:
+            assert not s1.has_key(self.wrap(i))
+        # XXX check that no additional keys
+
+    @given(intlists, intlists)
     def XXXtest_update_vs_not(self, c1, c2):
         return #XXX write me!
 
@@ -233,3 +248,11 @@
         # XXX check that no additional keys
 
 
+    @given(intlists, intlists)
+    def test_issubset(self, c1, c2):
+        s1 = self.intset(c1)
+        s2 = self.intset(c1)
+        for i in c2:
+            s2.remove(self.wrap(i))
+        assert s2.issubset(s1)
+        assert not s1.issubset(s2) or s1.equals(s2)


More information about the pypy-commit mailing list