From 24b0712fc1706b74320c7fa54ba31f564d67c7c2 Mon Sep 17 00:00:00 2001 From: Serafeim Papastefanos Date: Mon, 10 Mar 2014 17:17:57 +0200 Subject: [PATCH] Use unidecode to improve image filenames (fix#136) Image filenames containing non ascii characters would be translated to a series of underscores (____.png). To fix this, we use the unidecoe library (which we also add to the required packages for Wagtail) which translates each unicode character to an ascii equivalent. For more info on how unidecode works please check @Evgeny's answer at this question: http://stackoverflow.com/questions/702337/how-to-make-django-slugify-work-properly-with-unicode-strings --- setup.py | 1 + wagtail/wagtailimages/models.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b33576f540..c4b030c73f 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ setup( "Pillow>=2.3.0", "beautifulsoup4>=4.3.2", "lxml>=3.3.0", + 'Unidecode>=0.04.14', "BeautifulSoup==3.2.1", # django-compressor gets confused if we have lxml but not BS3 installed ], zip_safe=False, diff --git a/wagtail/wagtailimages/models.py b/wagtail/wagtailimages/models.py index d30f969ad5..61082c2cbc 100644 --- a/wagtail/wagtailimages/models.py +++ b/wagtail/wagtailimages/models.py @@ -14,6 +14,8 @@ from django.utils.html import escape from django.conf import settings from django.utils.translation import ugettext_lazy as _ +from unidecode import unidecode + from wagtail.wagtailadmin.taggable import TagSearchable from wagtail.wagtailimages import image_ops @@ -25,8 +27,9 @@ class AbstractImage(models.Model, TagSearchable): folder_name = 'original_images' filename = self.file.field.storage.get_valid_name(filename) + # do a unidecode in the filename and then # replace non-ascii characters in filename with _ , to sidestep issues with filesystem encoding - filename = "".join((i if ord(i) < 128 else '_') for i in filename) + filename = "".join((i if ord(i) < 128 else '_') for i in unidecode(filename)) while len(os.path.join(folder_name, filename)) >= 95: prefix, dot, extension = filename.rpartition('.')