From f0a0371ff1679e88dbc6c0fdad27586c82cb1c37 Mon Sep 17 00:00:00 2001 From: John Pangas Date: Wed, 22 Mar 2023 17:31:37 +0300 Subject: [PATCH 1/2] Test editbugs --- bugbug/models/spambug.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bugbug/models/spambug.py b/bugbug/models/spambug.py index 4b2f2d12f9..92b4feaf15 100644 --- a/bugbug/models/spambug.py +++ b/bugbug/models/spambug.py @@ -7,6 +7,7 @@ import xgboost from imblearn.over_sampling import BorderlineSMOTE +from requests import HTTPError from sklearn.compose import ColumnTransformer from sklearn.feature_extraction import DictVectorizer from sklearn.pipeline import Pipeline @@ -88,9 +89,20 @@ def get_labels(self): for bug_data in bugzilla.get_bugs(include_invalid=True): bug_id = bug_data["id"] + creator = bug_data["creator"] + + try: + userswitheditbugs = bugzilla.get_groups_users( + ["editbugs", "editbugs-team"] + ) + except HTTPError: + userswitheditbugs = set() + + if creator in userswitheditbugs: + continue # Skip bugs filed by Mozillians, since we are sure they are not spam. - if "@mozilla" in bug_data["creator"]: + if "@mozilla" in creator or "@softvision" in creator: continue # A bug that was moved out of 'Invalid Bugs' is definitely a legitimate bug. From a523eff198b03e480d0e0cac9d9b01068461fc3a Mon Sep 17 00:00:00 2001 From: John P Date: Thu, 1 Aug 2024 16:44:40 +0300 Subject: [PATCH 2/2] Collect users with edit_bugs --- bugbug/models/spambug.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/bugbug/models/spambug.py b/bugbug/models/spambug.py index 20722c9b51..f1ea819f10 100644 --- a/bugbug/models/spambug.py +++ b/bugbug/models/spambug.py @@ -94,22 +94,23 @@ def __init__(self, lemmatization=False): def get_labels(self): classes = {} + try: + users_with_edit_bugs = bugzilla.get_groups_users( + ["editbugs", "editbugs-team"] + ) + except HTTPError: + users_with_edit_bugs = set() + for bug_data in bugzilla.get_bugs(include_invalid=True): bug_id = bug_data["id"] creator = bug_data["creator"] - try: - userswitheditbugs = bugzilla.get_groups_users( - ["editbugs", "editbugs-team"] - ) - except HTTPError: - userswitheditbugs = set() - - if creator in userswitheditbugs: - continue - # Skip bugs filed by Mozillians, since we are sure they are not spam. - if "@mozilla" in creator or "@softvision" in creator: + if any( + "@mozilla" in creator, + "softvision" in creator, + creator in users_with_edit_bugs, + ): continue # A bug that was moved out of 'Invalid Bugs' is definitely a legitimate bug.