From 1b2c8eb3b653ecc05e332be16a79e740cd679a9f Mon Sep 17 00:00:00 2001 From: Aaron LI Date: Tue, 27 Aug 2019 15:54:43 +0800 Subject: cli/unzip-gbk: Rewrite in shell and use bsdtar/7z * Rewrite in shell instead of using the deprecated Python 2. * Use bsdtar/7z to extract zip to preserve the filename encoding; these tools also support encrypted zip archives. * Use iconv to convert the filename encoding. --- cli/unzip-gbk.py | 26 -------------------------- cli/unzip-gbk.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 26 deletions(-) delete mode 100755 cli/unzip-gbk.py create mode 100755 cli/unzip-gbk.sh diff --git a/cli/unzip-gbk.py b/cli/unzip-gbk.py deleted file mode 100755 index 423e10f..0000000 --- a/cli/unzip-gbk.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# unzip-gbk.py -# -# http://note.ninehills.info/linux-gbk.html -# - -import os -import sys -import zipfile - -print "Processing File " + sys.argv[1] - -file=zipfile.ZipFile(sys.argv[1],"r"); -for name in file.namelist(): - utf8name=name.decode('gbk') - print "Extracting " + utf8name - pathname = os.path.dirname(utf8name) - if not os.path.exists(pathname) and pathname!= "": - os.makedirs(pathname) - data = file.read(name) - if not os.path.exists(utf8name): - fo = open(utf8name, "w") - fo.write(data) - fo.close -file.close() diff --git a/cli/unzip-gbk.sh b/cli/unzip-gbk.sh new file mode 100755 index 0000000..53c7b57 --- /dev/null +++ b/cli/unzip-gbk.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# +# Extract a zip archive and fix Chinese filenames. +# +# Credit: https://superuser.com/a/872616 +# + +has() { + type "$1" >/dev/null 2>&1 +} + +extract() { + if has bsdtar; then + # bsdtar provided by libarchive + bsdtar -xvf "$1" + elif has 7z; then + # 7z provided by p7zip + env LC_ALL=C 7z x "$1" + else + echo "ERROR: Neither bsdtar nor 7z found" >&2 + exit 1 + fi +} + +fixnames() { + find . -depth | while read -r p; do + dn=$(dirname "${p}") + fn=$(basename "${p}") + fn2=$(echo "${fn}" | iconv -f gbk -t utf-8) + if [ "${fn}" != "${fn2}" ]; then + mv -v "${dn}/${fn}" "${dn}/${fn2}" + fi + done +} + +case $1 in +'' | -h | --help) + echo "usage: ${0##*/} " + exit 1 + ;; +esac + +zipfile=$(realpath "$1") +curdir=$(pwd) +tmpdir=$(mktemp -d) +cd "${tmpdir}" + +echo "Extracting archive '${zipfile}' ..." +extract "${zipfile}" +echo "Fixing filenames ..." +fixnames + +cd "${curdir}" +mv "${tmpdir}"/* . +rmdir "${tmpdir}" -- cgit v1.2.2