Added task to clear duplicate tags

Added logic to index grey and black list.
Added task to clear duplicate tags.
Appended details <BaseDup>, so when clearing duplicate tags, it can easily determine the start and end point to clear remarks.
This commit is contained in:
David Maisonave
2024-08-29 13:49:00 -04:00
parent be62ae57d7
commit b05037b2e3
5 changed files with 109 additions and 26 deletions

View File

@@ -11,6 +11,7 @@ parser = argparse.ArgumentParser()
parser.add_argument('--url', '-u', dest='stash_url', type=str, help='Add Stash URL')
parser.add_argument('--trace', '-t', dest='trace', action='store_true', help='Enables debug trace mode.')
parser.add_argument('--add_dup_tag', '-a', dest='dup_tag', action='store_true', help='Set a tag to duplicate files.')
parser.add_argument('--clear_dup_tag', '-c', dest='clear_tag', action='store_true', help='Clear duplicates of duplicate tags.')
parser.add_argument('--del_tag_dup', '-d', dest='del_tag', action='store_true', help='Only delete scenes having DuplicateMarkForDeletion tag.')
parser.add_argument('--remove_dup', '-r', dest='remove', action='store_true', help='Remove (delete) duplicate files.')
parse_args = parser.parse_args()
@@ -60,6 +61,8 @@ swapLongLength = stash.Setting('zSwapLongLength')
significantTimeDiff = stash.Setting('significantTimeDiff')
toRecycleBeforeSwap = stash.Setting('toRecycleBeforeSwap')
cleanAfterDel = stash.Setting('zCleanAfterDel')
favorLongerFileName = float(stash.Setting('favorLongerFileName'))
favorLargerFileSize = float(stash.Setting('favorLargerFileSize'))
duration_diff = float(stash.Setting('duration_diff'))
if duration_diff > 10:
duration_diff = 10
@@ -182,13 +185,16 @@ def createTagId(tagName, tagName_descp, deleteIfExist = False):
stash.Log(f"Dup-tagId={tagId['id']}")
return tagId['id']
detailPrefix = "BaseDup="
detailPostfix = "<BaseDup>\n"
def setTagId(tagId, tagName, sceneDetails, DupFileToKeep):
details = ""
ORG_DATA_DICT = {'id' : sceneDetails['id']}
dataDict = ORG_DATA_DICT.copy()
doAddTag = True
if addPrimaryDupPathToDetails:
BaseDupStr = f"BaseDup={DupFileToKeep['files'][0]['path']}\n{stash.STASH_URL}/scenes/{DupFileToKeep['id']}\n"
BaseDupStr = f"{detailPrefix}{DupFileToKeep['files'][0]['path']}\n{stash.STASH_URL}/scenes/{DupFileToKeep['id']}\n{detailPostfix}"
if sceneDetails['details'] == "":
details = BaseDupStr
elif not sceneDetails['details'].startswith(BaseDupStr):
@@ -215,6 +221,20 @@ def isInList(listToCk, pathToCk):
return True
return False
NOT_IN_LIST = 65535
def indexInList(listToCk, pathToCk):
pathToCk = pathToCk.lower()
index = -1
lenItemMatch = 0
returnValue = NOT_IN_LIST
for item in listToCk:
index += 1
if pathToCk.startswith(item):
if len(item) > lenItemMatch: # Make sure the best match is selected by getting match with longest string.
lenItemMatch = len(item)
returnValue = index
return returnValue
def hasSameDir(path1, path2):
if pathlib.Path(path1).resolve().parent == pathlib.Path(path2).resolve().parent:
return True
@@ -258,6 +278,21 @@ def isSwapCandidate(DupFileToKeep, DupFile):
return True
return False
def isWorseKeepCandidate(DupFileToKeep, Scene):
if not isInList(whitelist, Scene['files'][0]['path']) and isInList(whitelist, DupFileToKeep['files'][0]['path']):
return True
if not isInList(graylist, Scene['files'][0]['path']) and isInList(graylist, DupFileToKeep['files'][0]['path']):
return True
if not isInList(blacklist, DupFileToKeep['files'][0]['path']) and isInList(blacklist, Scene['files'][0]['path']):
return True
if isInList(graylist, Scene['files'][0]['path']) and isInList(graylist, DupFileToKeep['files'][0]['path']) and indexInList(graylist, DupFileToKeep['files'][0]['path']) < indexInList(graylist, Scene['files'][0]['path']):
return True
if isInList(blacklist, DupFileToKeep['files'][0]['path']) and isInList(blacklist, Scene['files'][0]['path']) and indexInList(blacklist, DupFileToKeep['files'][0]['path']) < indexInList(blacklist, Scene['files'][0]['path']):
return True
return False
def mangeDupFiles(merge=False, deleteDup=False, tagDuplicates=False):
duplicateMarkForDeletion_descp = 'Tag added to duplicate scenes so-as to tag them for deletion.'
stash.Trace(f"duplicateMarkForDeletion = {duplicateMarkForDeletion}")
@@ -312,18 +347,37 @@ def mangeDupFiles(merge=False, deleteDup=False, tagDuplicates=False):
if significantLessTime(int(DupFileToKeep['files'][0]['duration']), int(Scene['files'][0]['duration'])):
QtyRealTimeDiff += 1
if int(DupFileToKeep['files'][0]['width']) < int(Scene['files'][0]['width']) or int(DupFileToKeep['files'][0]['height']) < int(Scene['files'][0]['height']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=resolution: {DupFileToKeep['files'][0]['width']}x{DupFileToKeep['files'][0]['height']} < {Scene['files'][0]['width']}x{Scene['files'][0]['height']}")
DupFileToKeep = Scene
elif int(DupFileToKeep['files'][0]['duration']) < int(Scene['files'][0]['duration']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=duration: {DupFileToKeep['files'][0]['duration']} < {Scene['files'][0]['duration']}")
DupFileToKeep = Scene
elif isInList(whitelist, Scene['files'][0]['path']) and not isInList(whitelist, DupFileToKeep['files'][0]['path']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=not whitelist vs whitelist")
DupFileToKeep = Scene
elif isInList(blacklist, DupFileToKeep['files'][0]['path']) and not isInList(blacklist, Scene['files'][0]['path']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=blacklist vs not blacklist")
DupFileToKeep = Scene
elif isInList(blacklist, DupFileToKeep['files'][0]['path']) and isInList(blacklist, Scene['files'][0]['path']) and indexInList(blacklist, DupFileToKeep['files'][0]['path']) > indexInList(blacklist, Scene['files'][0]['path']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=blacklist-index {indexInList(blacklist, DupFileToKeep['files'][0]['path'])} > {indexInList(blacklist, Scene['files'][0]['path'])}")
DupFileToKeep = Scene
elif isInList(graylist, Scene['files'][0]['path']) and not isInList(graylist, DupFileToKeep['files'][0]['path']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=not graylist vs graylist")
DupFileToKeep = Scene
elif len(DupFileToKeep['files'][0]['path']) < len(Scene['files'][0]['path']):
elif isInList(graylist, Scene['files'][0]['path']) and isInList(graylist, DupFileToKeep['files'][0]['path']) and indexInList(graylist, DupFileToKeep['files'][0]['path']) > indexInList(graylist, Scene['files'][0]['path']):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=graylist-index {indexInList(graylist, DupFileToKeep['files'][0]['path'])} > {indexInList(graylist, Scene['files'][0]['path'])}")
DupFileToKeep = Scene
elif int(DupFileToKeep['files'][0]['size']) < int(Scene['files'][0]['size']):
elif favorLongerFileName and len(DupFileToKeep['files'][0]['path']) < len(Scene['files'][0]['path']) and not isWorseKeepCandidate(DupFileToKeep, Scene):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=path-len {len(DupFileToKeep['files'][0]['path'])} < {len(Scene['files'][0]['path'])}")
DupFileToKeep = Scene
elif favorLargerFileSize and int(DupFileToKeep['files'][0]['size']) < int(Scene['files'][0]['size']) and not isWorseKeepCandidate(DupFileToKeep, Scene):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=size {DupFileToKeep['files'][0]['size']} < {Scene['files'][0]['size']}")
DupFileToKeep = Scene
elif not favorLongerFileName and len(DupFileToKeep['files'][0]['path']) > len(Scene['files'][0]['path']) and not isWorseKeepCandidate(DupFileToKeep, Scene):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=path-len {len(DupFileToKeep['files'][0]['path'])} > {len(Scene['files'][0]['path'])}")
DupFileToKeep = Scene
elif not favorLargerFileSize and int(DupFileToKeep['files'][0]['size']) > int(Scene['files'][0]['size']) and not isWorseKeepCandidate(DupFileToKeep, Scene):
stash.Trace(f"Replacing {DupFileToKeep['files'][0]['path']} with {Scene['files'][0]['path']} for candidate to keep. Reason=size {DupFileToKeep['files'][0]['size']} > {Scene['files'][0]['size']}")
DupFileToKeep = Scene
else:
DupFileToKeep = Scene
@@ -384,7 +438,7 @@ def mangeDupFiles(merge=False, deleteDup=False, tagDuplicates=False):
stash.metadata_clean_generated()
stash.optimise_database()
def deleteTagggedDuplicates():
def manageTaggedDuplicates(deleteFiles=False):
tagId = stash.find_tags(q=duplicateMarkForDeletion)
if len(tagId) > 0 and 'id' in tagId[0]:
tagId = tagId[0]['id']
@@ -409,19 +463,34 @@ def deleteTagggedDuplicates():
QtyFailedQuery += 1
continue
# stash.Log(f"scene={scene}")
DupFileName = scene['files'][0]['path']
DupFileNameOnly = pathlib.Path(DupFileName).stem
stash.Warn(f"Deleting duplicate '{DupFileName}'", toAscii=True, printTo=LOG_STASH_N_PLUGIN)
if alternateTrashCanPath != "":
destPath = f"{alternateTrashCanPath }{os.sep}{DupFileNameOnly}"
if os.path.isfile(destPath):
destPath = f"{alternateTrashCanPath }{os.sep}_{time.time()}_{DupFileNameOnly}"
shutil.move(DupFileName, destPath)
elif moveToTrashCan:
sendToTrash(DupFileName)
result = stash.destroy_scene(scene['id'], delete_file=True)
stash.Trace(f"destroy_scene result={result} for file {DupFileName}", toAscii=True)
QtyDeleted += 1
if deleteFiles:
DupFileName = scene['files'][0]['path']
DupFileNameOnly = pathlib.Path(DupFileName).stem
stash.Warn(f"Deleting duplicate '{DupFileName}'", toAscii=True, printTo=LOG_STASH_N_PLUGIN)
if alternateTrashCanPath != "":
destPath = f"{alternateTrashCanPath }{os.sep}{DupFileNameOnly}"
if os.path.isfile(destPath):
destPath = f"{alternateTrashCanPath }{os.sep}_{time.time()}_{DupFileNameOnly}"
shutil.move(DupFileName, destPath)
elif moveToTrashCan:
sendToTrash(DupFileName)
result = stash.destroy_scene(scene['id'], delete_file=True)
stash.Trace(f"destroy_scene result={result} for file {DupFileName}", toAscii=True)
QtyDeleted += 1
else:
tags = [int(item['id']) for item in scene["tags"] if item['id'] != tagId]
stash.TraceOnce(f"tagId={tagId}, len={len(tags)}, tags = {tags}")
dataDict = {'id' : scene['id']}
if addPrimaryDupPathToDetails:
sceneDetails = scene['details']
if sceneDetails.find(detailPrefix) == 0 and sceneDetails.find(detailPostfix) > 1:
Pos1 = sceneDetails.find(detailPrefix)
Pos2 = sceneDetails.find(detailPostfix)
sceneDetails = sceneDetails[0:Pos1] + sceneDetails[Pos2 + len(detailPostfix):]
dataDict.update({'details' : sceneDetails})
dataDict.update({'tag_ids' : tags})
stash.Log(f"Updating scene with {dataDict}")
stash.update_scene(dataDict)
stash.Log(f"QtyDup={QtyDup}, QtyDeleted={QtyDeleted}, QtyFailedQuery={QtyFailedQuery}", printTo=LOG_STASH_N_PLUGIN)
return
@@ -439,7 +508,10 @@ if stash.PLUGIN_TASK_NAME == "tag_duplicates_task":
mangeDupFiles(tagDuplicates=True, merge=mergeDupFilename)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif stash.PLUGIN_TASK_NAME == "delete_tagged_duplicates_task":
deleteTagggedDuplicates()
manageTaggedDuplicates(deleteFiles=True)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif stash.PLUGIN_TASK_NAME == "clear_duplicate_tags_task":
manageTaggedDuplicates(deleteFiles=False)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif stash.PLUGIN_TASK_NAME == "delete_duplicates_task":
mangeDupFiles(deleteDup=True, merge=mergeDupFilename)
@@ -448,8 +520,11 @@ elif parse_args.dup_tag:
mangeDupFiles(tagDuplicates=True, merge=mergeDupFilename)
stash.Trace(f"Tag duplicate EXIT")
elif parse_args.del_tag:
deleteTagggedDuplicates()
stash.Trace(f"Delete Tagged duplicates EXIT")
manageTaggedDuplicates(deleteFiles=True)
stash.Trace(f"Delete tagged duplicates EXIT")
elif parse_args.clear_tag:
manageTaggedDuplicates(deleteFiles=False)
stash.Trace(f"Clear duplicate tags EXIT")
elif parse_args.remove:
mangeDupFiles(deleteDup=True, merge=mergeDupFilename)
stash.Trace(f"Delete duplicate EXIT")

View File

@@ -1,6 +1,6 @@
name: DupFileManager
description: Manages duplicate files.
version: 0.1.2
version: 0.1.3
url: https://github.com/David-Maisonave/Axter-Stash/tree/main/plugins/DupFileManager
settings:
mergeDupFilename:
@@ -37,11 +37,11 @@ settings:
type: STRING
zxGraylist:
displayName: Gray List
description: List of preferential paths to determine which duplicate should be the primary. E.g. C:\2nd_Favorite\,H:\ShouldKeep\
description: Preferential paths to determine which duplicate should be kept. E.g. C:\2nd_Fav,C:\3rd_Fav,C:\4th_Fav,H:\ShouldKeep
type: STRING
zyBlacklist:
displayName: Black List
description: List of LEAST preferential paths to determine primary candidates for deletion. E.g. C:\Downloads\,F:\DeleteMeFirst\
description: Least preferential paths; Determine primary deletion candidates. E.g. C:\Downloads,C:\DelMe-3rd,C:\DelMe-2nd,C:\DeleteMeFirst
type: STRING
zyMaxDupToProcess:
displayName: Max Dup Process
@@ -49,7 +49,7 @@ settings:
type: NUMBER
zzdebugTracing:
displayName: Debug Tracing
description: (Default=false) [***For Advanced Users***] Enable debug tracing. When enabled, additional tracing logging is added to Stash\plugins\DupFileManager\DupFileManager.log
description: Enable debug tracing so-as to add additional debug logging in Stash\plugins\DupFileManager\DupFileManager.log
type: BOOLEAN
exec:
- python
@@ -68,3 +68,7 @@ tasks:
description: Delete duplicate scenes. Performs deletion without first tagging.
defaultArgs:
mode: delete_duplicates_task
- name: Clear Duplicate Tags
description: Removes DuplicateMarkForDeletion tag from all files.
defaultArgs:
mode: clear_duplicate_tags_task

View File

@@ -18,6 +18,10 @@ config = {
"DupFileTag" : "DuplicateMarkForDeletion",
# Tag name used to tag duplicates in the whitelist. E.g. DuplicateWhitelistFile
"DupWhiteListTag" : "DuplicateWhitelistFile",
# If enabled, favor longer file name over shorter. If disabled, favor shorter file name.
"favorLongerFileName" : True,
# If enabled, favor larger file size over smaller. If disabled, favor smaller file size.
"favorLargerFileSize" : True,
# The following fields are ONLY used when running DupFileManager in script mode
"endpoint_Scheme" : "http", # Define endpoint to use when contacting the Stash server

View File

@@ -1,4 +1,4 @@
# DupFileManager: Ver 0.1.2 (By David Maisonave)
# DupFileManager: Ver 0.1.3 (By David Maisonave)
DupFileManager is a [Stash](https://github.com/stashapp/stash) plugin which manages duplicate file in the Stash system.

View File

@@ -37,7 +37,7 @@ settings:
type: STRING
zzdebugTracing:
displayName: Debug Tracing
description: (Default=false) [***For Advanced Users***] Enable debug tracing. When enabled, additional tracing logging is added to Stash\plugins\RenameFile\renamefile.log
description: Enable debug tracing so-as to add additional debug logging in Stash\plugins\RenameFile\renamefile.log
type: BOOLEAN
zzdryRun:
displayName: Dry Run