This commit is contained in:
David Maisonave
2024-08-20 20:35:06 -04:00
parent fb0760acde
commit 826e651a6e
15 changed files with 312 additions and 141 deletions

View File

@@ -1,21 +1,14 @@
# Description: This is a Stash plugin which manages duplicate files.
# By David Maisonave (aka Axter) Jul-2024 (https://www.axter.com/)
# Get the latest developers version from following link: https://github.com/David-Maisonave/Axter-Stash/tree/main/plugins/DupFileManager
# Note: To call this script outside of Stash, pass any argument.
# Example: python DupFileManager.py start
# Note: To call this script outside of Stash, pass argument --url
# Example: python DupFileManager.py --url http://localhost:9999 -a
# Research:
# Research following links to complete this plugin:
# https://github.com/WithoutPants/stash-plugin-duplicate-finder
#
# Look at options in programs from the following link:
# https://video.stackexchange.com/questions/25302/how-can-i-find-duplicate-videos-by-content
#
# Python library for parse-reparsepoint
# https://pypi.org/project/parse-reparsepoint/
# pip install parse-reparsepoint
#
# Look at stash API find_duplicate_scenes
import os, sys, time, pathlib, argparse, platform
from StashPluginHelper import StashPluginHelper
from DupFileManager_config import config # Import config from DupFileManager_config.py
@@ -24,15 +17,18 @@ parser = argparse.ArgumentParser()
parser.add_argument('--url', '-u', dest='stash_url', type=str, help='Add Stash URL')
parser.add_argument('--trace', '-t', dest='trace', action='store_true', help='Enables debug trace mode.')
parser.add_argument('--remove_dup', '-r', dest='remove', action='store_true', help='Remove (delete) duplicate files.')
parser.add_argument('--dryrun', '-d', dest='dryrun', action='store_true', help='Do dryrun for deleting duplicate files. No files are deleted, and only logging occurs.')
parser.add_argument('--add_dup_tag', '-a', dest='dup_tag', action='store_true', help='Set a tag to duplicate files.')
parse_args = parser.parse_args()
settings = {
"mergeDupFilename": True,
"moveToTrashCan": False,
"whitelist": [],
"dupFileTag": "DuplicateMarkForDeletion",
"dupWhiteListTag": "",
"zxgraylist": "",
"zwhitelist": "",
"zzblacklist": "",
"zzdebugTracing": False,
"zzdryRun": False,
}
stash = StashPluginHelper(
stash_url=parse_args.stash_url,
@@ -41,10 +37,13 @@ stash = StashPluginHelper(
config=config
)
stash.Status()
stash.Log(f"\nStarting (__file__={__file__}) (stash.CALLED_AS_STASH_PLUGIN={stash.CALLED_AS_STASH_PLUGIN}) (stash.DEBUG_TRACING={stash.DEBUG_TRACING}) (stash.DRY_RUN={stash.DRY_RUN}) (stash.PLUGIN_TASK_NAME={stash.PLUGIN_TASK_NAME})************************************************")
stash.Log(f"\nStarting (__file__={__file__}) (stash.CALLED_AS_STASH_PLUGIN={stash.CALLED_AS_STASH_PLUGIN}) (stash.DEBUG_TRACING={stash.DEBUG_TRACING}) (stash.PLUGIN_TASK_NAME={stash.PLUGIN_TASK_NAME})************************************************")
stash.Trace(f"(stashPaths={stash.STASH_PATHS})")
listSeparator = stash.pluginConfig['listSeparator'] if stash.pluginConfig['listSeparator'] != "" else ','
addPrimaryDupPathToDetails = stash.pluginConfig['addPrimaryDupPathToDetails']
def realpath(path):
"""
get_symbolic_target for win
@@ -77,7 +76,7 @@ def isReparsePoint(path):
path = os.path.dirname(path)
return win32api.GetFileAttributes(path) & win32con.FILE_ATTRIBUTE_REPARSE_POINT
def mangeDupFiles(merge=False, deleteDup=False, DryRun=False):
def testReparsePointAndSymLink(merge=False, deleteDup=False):
stash.Trace(f"Debug Tracing (platform.system()={platform.system()})")
myTestPath1 = r"B:\V\V\Tip\POV - Holly Molly petite ginger anal slut - RedTube.mp4" # not a reparse point or symbolic link
myTestPath2 = r"B:\_\SpecialSet\Amateur Anal Attempts\BRCC test studio name.m2ts" # reparse point
@@ -117,22 +116,177 @@ def mangeDupFiles(merge=False, deleteDup=False, DryRun=False):
stash.Log(f"Not isSymLink '{myTestPath6}'")
return
def createTagId(tagName, tagName_descp, deleteIfExist = False):
tagId = stash.find_tags(q=tagName)
if len(tagId):
tagId = tagId[0]
if deleteIfExist:
stash.destroy_tag(int(tagId['id']))
else:
return tagId['id']
tagId = stash.create_tag({"name":tagName, "description":tagName_descp, "ignore_auto_tag": True})
stash.Log(f"Dup-tagId={tagId['id']}")
return tagId['id']
def setTagId(tagId, tagName, sceneDetails, PrimeDuplicateScene = ""):
if PrimeDuplicateScene != "" and addPrimaryDupPathToDetails:
if sceneDetails['details'].startswith(f"Primary Duplicate = {PrimeDuplicateScene}"):
PrimeDuplicateScene = ""
elif sceneDetails['details'] == ""
PrimeDuplicateScene = f"Primary Duplicate = {PrimeDuplicateScene}"
else:
PrimeDuplicateScene = f"Primary Duplicate = {PrimeDuplicateScene}; {sceneDetails['details']}"
for tag in sceneDetails['tags']:
if tag['name'] == tagName:
if PrimeDuplicateScene != "" and addPrimaryDupPathToDetails:
stash.update_scene({'id' : sceneDetails['id'], 'details' : PrimeDuplicateScene})
return
if PrimeDuplicateScene == "" or not addPrimaryDupPathToDetails:
stash.update_scene({'id' : sceneDetails['id'], 'tag_ids' : tagId})
else:
stash.update_scene({'id' : sceneDetails['id'], 'tag_ids' : tagId, 'details' : PrimeDuplicateScene})
def isInList(listToCk, pathToCk):
pathToCk = pathToCk.lower()
for item in listToCk:
if pathToCk.startswith(item):
return True
return False
def mangeDupFiles(merge=False, deleteDup=False, tagDuplicates=False):
duration_diff = 10.00
duplicateMarkForDeletion = stash.pluginSettings['dupFileTag']
duplicateMarkForDeletion_descp = 'Tag added to duplicate scenes so-as to tag them for deletion.'
if duplicateMarkForDeletion == "":
duplicateMarkForDeletion = 'DuplicateMarkForDeletion'
stash.Log(f"duplicateMarkForDeletion = {duplicateMarkForDeletion}")
dupTagId = createTagId(duplicateMarkForDeletion, duplicateMarkForDeletion_descp)
stash.Trace(f"dupTagId={dupTagId} name={duplicateMarkForDeletion}")
duplicateWhitelistTag = stash.pluginSettings['dupWhiteListTag']
dupWhitelistTagId = None
if duplicateWhitelistTag != "":
stash.Log(f"duplicateWhitelistTag = {duplicateWhitelistTag}")
duplicateWhitelistTag_descp = 'Tag added to duplicate scenes which are in the whitelist. This means there are two or more duplicates in the whitelist.'
dupWhitelistTagId = createTagId(duplicateWhitelistTag, duplicateWhitelistTag_descp)
stash.Trace(f"dupWhitelistTagId={dupWhitelistTagId} name={duplicateWhitelistTag}")
graylist = stash.pluginSettings['zxgraylist'].split(listSeparator)
graylist = [item.lower() for item in graylist]
if graylist == [""] : graylist = []
stash.Log(f"graylist = {graylist}")
whitelist = stash.pluginSettings['zwhitelist'].split(listSeparator)
whitelist = [item.lower() for item in whitelist]
if whitelist == [""] : whitelist = []
stash.Log(f"whitelist = {whitelist}")
blacklist = stash.pluginSettings['zzblacklist'].split(listSeparator)
blacklist = [item.lower() for item in blacklist]
if blacklist == [""] : blacklist = []
stash.Log(f"blacklist = {blacklist}")
QtyDupSet = 0
QtyDup = 0
QtyExactDup = 0
QtyAlmostDup = 0
QtyTagForDel = 0
QtySkipForDel = 0
stash.Log("Waiting for find_duplicate_scenes_diff to return results...")
DupFileSets = stash.find_duplicate_scenes_diff(duration_diff=duration_diff)
stash.Log("#########################################################################")
stash.Log("#########################################################################")
for DupFileSet in DupFileSets:
stash.Trace(f"DupFileSet={DupFileSet}")
QtyDupSet+=1
SepLine = "---------------------------"
DupFileToKeep = ""
DupToCopyFrom = ""
DupFileDetailList = []
for DupFile in DupFileSet:
QtyDup+=1
Scene = stash.find_scene(DupFile['id'])
stash.Trace(f"Scene = {Scene.encode('ascii','ignore')}")
DupFileDetailList = DupFileDetailList + [Scene]
if DupFileToKeep != "":
if DupFileToKeep['files'][0]['duration'] == Scene['files'][0]['duration']:
QtyExactDup+=1
else:
QtyAlmostDup+=1
SepLine = "***************************"
if int(DupFileToKeep['files'][0]['width']) < int(Scene['files'][0]['width']) or int(DupFileToKeep['files'][0]['height']) < int(Scene['files'][0]['height']):
DupFileToKeep = Scene
elif int(DupFileToKeep['files'][0]['duration']) < int(Scene['files'][0]['duration']):
DupFileToKeep = Scene
elif int(DupFileToKeep['files'][0]['size']) < int(Scene['files'][0]['size']):
DupFileToKeep = Scene
elif len(DupFileToKeep['files'][0]['path']) < len(Scene['files'][0]['path']):
DupFileToKeep = Scene
elif isInList(whitelist, Scene['files'][0]['path']) and not isInList(whitelist, DupFileToKeep['files'][0]['path']):
DupFileToKeep = Scene
elif isInList(blacklist, DupFileToKeep['files'][0]['path']) and not isInList(blacklist, Scene['files'][0]['path']):
DupFileToKeep = Scene
elif isInList(graylist, Scene['files'][0]['path']) and not isInList(graylist, DupFileToKeep['files'][0]['path']):
DupFileToKeep = Scene
else:
DupFileToKeep = Scene
# stash.Log(f"DupFileToKeep = {DupFileToKeep}")
stash.Trace(f"KeepID={DupFileToKeep['id']}, ID={DupFile['id']} duration=({Scene['files'][0]['duration']}), Size=({Scene['files'][0]['size']}), Res=({Scene['files'][0]['width']} x {Scene['files'][0]['height']}) Name={Scene['files'][0]['path'].encode('ascii','ignore')}")
for DupFile in DupFileDetailList:
if DupFile['id'] != DupFileToKeep['id']:
if isInList(whitelist, DupFile['files'][0]['path']):
stash.Log(f"NOT tagging duplicate, because it's in whitelist. '{DupFile['files'][0]['path'].encode('ascii','ignore')}'")
if dupWhitelistTagId and tagDuplicates:
setTagId(dupWhitelistTagId, duplicateWhitelistTag, DupFile, DupFileToKeep['files'][0]['path'])
QtySkipForDel+=1
else:
if deleteDup:
stash.Log(f"Deleting duplicate '{DupFile['files'][0]['path'].encode('ascii','ignore')}'")
# ToDo: Add logic to check if moving file to deletion folder, or doing full delete.
# ToDo: Add logic to check if tag merging is needed before performing deletion.
elif tagDuplicates:
if QtyTagForDel == 0:
stash.Log(f"Tagging duplicate {DupFile['files'][0]['path'].encode('ascii','ignore')} for deletion with tag {duplicateMarkForDeletion}.")
else:
stash.Log(f"Tagging duplicate {DupFile['files'][0]['path'].encode('ascii','ignore')} for deletion.")
setTagId(dupTagId, duplicateMarkForDeletion, DupFile, DupFileToKeep['files'][0]['path'])
QtyTagForDel+=1
stash.Log(SepLine)
if QtyDup > 200:
break
stash.Log(f"QtyDupSet={QtyDupSet}, QtyDup={QtyDup}, QtyTagForDel={QtyTagForDel}, QtySkipForDel={QtySkipForDel}, QtyExactDup={QtyExactDup}, QtyAlmostDup={QtyAlmostDup}")
def testSetDupTagOnScene(sceneId):
scene = stash.find_scene(sceneId)
stash.Log(f"scene={scene}")
stash.Log(f"scene tags={scene['tags']}")
tag_ids = [dupTagId]
for tag in scene['tags']:
tag_ids = tag_ids + [tag['id']]
stash.Log(f"tag_ids={tag_ids}")
stash.update_scene({'id' : scene['id'], 'tag_ids' : tag_ids})
if stash.PLUGIN_TASK_NAME == "merge_dup_filename_task":
mangeDupFiles(merge=True)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif stash.PLUGIN_TASK_NAME == "delete_duplicates":
mangeDupFiles(deleteDup=True)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif stash.PLUGIN_TASK_NAME == "dryrun_delete_duplicates":
mangeDupFiles(deleteDup=True, DryRun=True)
stash.Trace(f"{stash.PLUGIN_TASK_NAME} EXIT")
elif parse_args.dup_tag:
mangeDupFiles(tagDuplicates=True)
stash.Trace(f"Tag duplicate EXIT")
elif parse_args.remove:
mangeDupFiles(deleteDup=True, DryRun=parse_args.dryrun)
stash.Trace(f"Delete duplicate (DryRun={parse_args.dryrun}) EXIT")
elif parse_args.dryrun:
mangeDupFiles(deleteDup=True, DryRun=parse_args.dryrun)
stash.Trace(f"Dryrun delete duplicate EXIT")
mangeDupFiles(deleteDup=True)
stash.Trace(f"Delete duplicate EXIT")
else:
stash.Log(f"Nothing to do!!! (PLUGIN_ARGS_MODE={PLUGIN_ARGS_MODE})")
stash.Log(f"Nothing to do!!! (PLUGIN_ARGS_MODE={stash.PLUGIN_TASK_NAME})")
stash.Trace("\n*********************************\nEXITING ***********************\n*********************************")

View File

@@ -4,38 +4,54 @@ version: 0.1.0
url: https://github.com/David-Maisonave/Axter-Stash/tree/main/plugins/DupFileManager
settings:
mergeDupFilename:
displayName: Before deletion, merge potential source in the duplicate file names for tag names, performers, and studios.
description: Enable to
displayName: Merge Duplicate Tags
description: Before deletion, merge potential source in the duplicate file names for tag names, performers, and studios.
type: BOOLEAN
moveToTrashCan:
displayName: Trash Can
description: Enable to move files to trash can instead of permanently delete file.
type: BOOLEAN
whitelist:
dupFileTag:
displayName: Duplicate File Tag Name
description: (Default = DuplicateMarkForDeletion) Tag used to tag duplicates with lower resolution, duration, and file name length.
type: STRING
dupWhiteListTag:
displayName: Duplicate Whitelist Tag Name
description: If populated, a tag name used to tag duplicates in the whitelist. E.g. DuplicateWhitelistFile
type: STRING
zwhitelist:
displayName: White List
description: A comma seperated list of preferential paths to determine which duplicate should be the primary. Listed in order of preference.
description: A comma seperated list of paths NOT to be deleted. E.g. C:\Favorite\,E:\MustKeep\
type: STRING
zxgraylist:
displayName: Gray List
description: List of preferential paths to determine which duplicate should be the primary. E.g. C:\2nd_Favorite\,H:\ShouldKeep\
type: STRING
zzblacklist:
displayName: Black List
description: List of LEAST preferential paths to determine primary candidates for deletion. E.g. C:\Downloads\,F:\DeleteMeFirst\
type: STRING
zzdebugTracing:
displayName: Debug Tracing
description: (Default=false) [***For Advanced Users***] Enable debug tracing. When enabled, additional tracing logging is added to Stash\plugins\DupFileManager\DupFileManager.log
type: BOOLEAN
zzdryRun:
displayName: Dry Run
description: Enable to run script in [Dry Run] mode. In this mode, Stash does NOT call meta_scan, and only logs the action it would have taken.
type: BOOLEAN
exec:
- python
- "{pluginDir}/DupFileManager.py"
interface: raw
tasks:
- name: Merge Duplicate Filename
description: Merge duplicate filename sourcetag names, performers, and studios.
- name: Tag Duplicate Filename
description: Set tag DuplicateMarkForDeletion to the duplicate with lower resolution, duration, file name length, and/or black list path.
defaultArgs:
mode: merge_dup_filename_task
- name: Delete Duplicates
description: Delete duplicate files
defaultArgs:
mode: delete_duplicates
- name: Merge Duplicate Filename
description: Merge duplicate filename sourcetag names, performers, and studios.
defaultArgs:
mode: merge_dup_filename_task
- name: Dry Run Delete Duplicates
description: Only perform a dry run (logging only) of duplicate file deletions. Dry Run setting is ignore when running this task.
defaultArgs:

View File

@@ -2,9 +2,11 @@
# By David Maisonave (aka Axter) Jul-2024 (https://www.axter.com/)
# Get the latest developers version from following link: https://github.com/David-Maisonave/Axter-Stash/tree/main/plugins/DupFileManager
config = {
# Define black list to determine which duplicates should be deleted first.
"blacklist_paths": [], #Example: "blacklist_paths": ['C:\\SomeMediaPath\\subpath', "E:\\YetAnotherPath\\subpath', "E:\\YetAnotherPath\\secondSubPath']
# Character used to seperate items on the whitelist, blacklist, and graylist
"listSeparator" : ",",
# If enabled, adds the primary duplicate path to the scene detail.
"addPrimaryDupPathToDetails" : True,
# If enabled, ignore reparsepoints. For Windows NT drives only.
"ignoreReparsepoints" : True,
# If enabled, ignore symbolic links.

View File

@@ -1,6 +1,6 @@
from stashapi.stashapp import StashInterface
from logging.handlers import RotatingFileHandler
import inspect, sys, os, pathlib, logging, json
import re, inspect, sys, os, pathlib, logging, json
import concurrent.futures
from stashapi.stash_types import PhashDistance
import __main__
@@ -30,7 +30,6 @@ class StashPluginHelper(StashInterface):
PLUGINS_PATH = None
pluginSettings = None
pluginConfig = None
STASH_INTERFACE_INIT = False
STASH_URL = None
STASH_CONFIGURATION = None
JSON_INPUT = None
@@ -62,6 +61,7 @@ class StashPluginHelper(StashInterface):
pluginLog = None
logLinePreviousHits = []
thredPool = None
STASH_INTERFACE_INIT = False
# Prefix message value
LEV_TRACE = "TRACE: "
@@ -106,7 +106,7 @@ class StashPluginHelper(StashInterface):
if logToNormSet: self.log_to_norm = logToNormSet
if stash_url and len(stash_url): self.STASH_URL = stash_url
self.MAIN_SCRIPT_NAME = mainScriptName if mainScriptName != "" else __main__.__file__
self.PLUGIN_ID = pluginID if pluginID != "" else pathlib.Path(self.MAIN_SCRIPT_NAME).stem.lower()
self.PLUGIN_ID = pluginID if pluginID != "" else pathlib.Path(self.MAIN_SCRIPT_NAME).stem
# print(f"self.MAIN_SCRIPT_NAME={self.MAIN_SCRIPT_NAME}, self.PLUGIN_ID={self.PLUGIN_ID}", file=sys.stderr)
self.LOG_FILE_NAME = logFilePath if logFilePath != "" else f"{pathlib.Path(self.MAIN_SCRIPT_NAME).resolve().parent}{os.sep}{pathlib.Path(self.MAIN_SCRIPT_NAME).stem}.log"
self.LOG_FILE_DIR = pathlib.Path(self.LOG_FILE_NAME).resolve().parent
@@ -355,24 +355,20 @@ class StashPluginHelper(StashInterface):
def rename_generated_files(self):
return self.call_GQL("mutation MigrateHashNaming {migrateHashNaming}")
# def find_duplicate_scenes(self, distance: PhashDistance=PhashDistance.EXACT, fragment=None):
# query = """
# query FindDuplicateScenes($distance: Int) {
# findDuplicateScenes(distance: $distance) {
# ...SceneSlim
# }
# }
# """
# if fragment:
# query = re.sub(r'\.\.\.SceneSlim', fragment, query)
# else:
# query = """
# query FindDuplicateScenes($distance: Int) {
# findDuplicateScenes(distance: $distance)
# }
# """
# variables = {
# "distance": distance
# }
# result = self.call_GQL(query, variables)
# return result['findDuplicateScenes']
def find_duplicate_scenes_diff(self, distance: PhashDistance=PhashDistance.EXACT, fragment='id', duration_diff: float=10.00 ):
query = """
query FindDuplicateScenes($distance: Int, $duration_diff: Float) {
findDuplicateScenes(distance: $distance, duration_diff: $duration_diff) {
...SceneSlim
}
}
"""
if fragment:
query = re.sub(r'\.\.\.SceneSlim', fragment, query)
else:
query += "fragment SceneSlim on Scene { id }"
variables = { "distance": distance, "duration_diff": duration_diff }
result = self.call_GQL(query, variables)
return result['findDuplicateScenes']

View File

@@ -1,3 +1,3 @@
stashapp-tools >= 0.2.49
stashapp-tools >= 0.2.50
pyYAML
watchdog