...

Source file src/archive/zip/struct.go

Documentation: archive/zip

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package zip provides support for reading and writing ZIP archives.
     7  
     8  See the [ZIP specification] for details.
     9  
    10  This package does not support disk spanning.
    11  
    12  A note about ZIP64:
    13  
    14  To be backwards compatible the FileHeader has both 32 and 64 bit Size
    15  fields. The 64 bit fields will always contain the correct value and
    16  for normal archives both fields will be the same. For files requiring
    17  the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
    18  fields must be used instead.
    19  
    20  [ZIP specification]: https://support.pkware.com/pkzip/appnote
    21  */
    22  package zip
    23  
    24  import (
    25  	"io/fs"
    26  	"path"
    27  	"time"
    28  )
    29  
    30  // Compression methods.
    31  const (
    32  	Store   uint16 = 0 // no compression
    33  	Deflate uint16 = 8 // DEFLATE compressed
    34  )
    35  
    36  const (
    37  	fileHeaderSignature      = 0x04034b50
    38  	directoryHeaderSignature = 0x02014b50
    39  	directoryEndSignature    = 0x06054b50
    40  	directory64LocSignature  = 0x07064b50
    41  	directory64EndSignature  = 0x06064b50
    42  	dataDescriptorSignature  = 0x08074b50 // de-facto standard; required by OS X Finder
    43  	fileHeaderLen            = 30         // + filename + extra
    44  	directoryHeaderLen       = 46         // + filename + extra + comment
    45  	directoryEndLen          = 22         // + comment
    46  	dataDescriptorLen        = 16         // four uint32: descriptor signature, crc32, compressed size, size
    47  	dataDescriptor64Len      = 24         // two uint32: signature, crc32 | two uint64: compressed size, size
    48  	directory64LocLen        = 20         //
    49  	directory64EndLen        = 56         // + extra
    50  
    51  	// Constants for the first byte in CreatorVersion.
    52  	creatorFAT    = 0
    53  	creatorUnix   = 3
    54  	creatorNTFS   = 11
    55  	creatorVFAT   = 14
    56  	creatorMacOSX = 19
    57  
    58  	// Version numbers.
    59  	zipVersion20 = 20 // 2.0
    60  	zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
    61  
    62  	// Limits for non zip64 files.
    63  	uint16max = (1 << 16) - 1
    64  	uint32max = (1 << 32) - 1
    65  
    66  	// Extra header IDs.
    67  	//
    68  	// IDs 0..31 are reserved for official use by PKWARE.
    69  	// IDs above that range are defined by third-party vendors.
    70  	// Since ZIP lacked high precision timestamps (nor an official specification
    71  	// of the timezone used for the date fields), many competing extra fields
    72  	// have been invented. Pervasive use effectively makes them "official".
    73  	//
    74  	// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
    75  	zip64ExtraID       = 0x0001 // Zip64 extended information
    76  	ntfsExtraID        = 0x000a // NTFS
    77  	unixExtraID        = 0x000d // UNIX
    78  	extTimeExtraID     = 0x5455 // Extended timestamp
    79  	infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
    80  )
    81  
    82  // FileHeader describes a file within a ZIP file.
    83  // See the [ZIP specification] for details.
    84  //
    85  // [ZIP specification]: https://support.pkware.com/pkzip/appnote
    86  type FileHeader struct {
    87  	// Name is the name of the file.
    88  	//
    89  	// It must be a relative path, not start with a drive letter (such as "C:"),
    90  	// and must use forward slashes instead of back slashes. A trailing slash
    91  	// indicates that this file is a directory and should have no data.
    92  	Name string
    93  
    94  	// Comment is any arbitrary user-defined string shorter than 64KiB.
    95  	Comment string
    96  
    97  	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
    98  	//
    99  	// By specification, the only other encoding permitted should be CP-437,
   100  	// but historically many ZIP readers interpret Name and Comment as whatever
   101  	// the system's local character encoding happens to be.
   102  	//
   103  	// This flag should only be set if the user intends to encode a non-portable
   104  	// ZIP file for a specific localized region. Otherwise, the Writer
   105  	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
   106  	NonUTF8 bool
   107  
   108  	CreatorVersion uint16
   109  	ReaderVersion  uint16
   110  	Flags          uint16
   111  
   112  	// Method is the compression method. If zero, Store is used.
   113  	Method uint16
   114  
   115  	// Modified is the modified time of the file.
   116  	//
   117  	// When reading, an extended timestamp is preferred over the legacy MS-DOS
   118  	// date field, and the offset between the times is used as the timezone.
   119  	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
   120  	//
   121  	// When writing, an extended timestamp (which is timezone-agnostic) is
   122  	// always emitted. The legacy MS-DOS date field is encoded according to the
   123  	// location of the Modified time.
   124  	Modified time.Time
   125  
   126  	// ModifiedTime is an MS-DOS-encoded time.
   127  	//
   128  	// Deprecated: Use Modified instead.
   129  	ModifiedTime uint16
   130  
   131  	// ModifiedDate is an MS-DOS-encoded date.
   132  	//
   133  	// Deprecated: Use Modified instead.
   134  	ModifiedDate uint16
   135  
   136  	// CRC32 is the CRC32 checksum of the file content.
   137  	CRC32 uint32
   138  
   139  	// CompressedSize is the compressed size of the file in bytes.
   140  	// If either the uncompressed or compressed size of the file
   141  	// does not fit in 32 bits, CompressedSize is set to ^uint32(0).
   142  	//
   143  	// Deprecated: Use CompressedSize64 instead.
   144  	CompressedSize uint32
   145  
   146  	// UncompressedSize is the uncompressed size of the file in bytes.
   147  	// If either the uncompressed or compressed size of the file
   148  	// does not fit in 32 bits, UncompressedSize is set to ^uint32(0).
   149  	//
   150  	// Deprecated: Use UncompressedSize64 instead.
   151  	UncompressedSize uint32
   152  
   153  	// CompressedSize64 is the compressed size of the file in bytes.
   154  	CompressedSize64 uint64
   155  
   156  	// UncompressedSize64 is the uncompressed size of the file in bytes.
   157  	UncompressedSize64 uint64
   158  
   159  	Extra         []byte
   160  	ExternalAttrs uint32 // Meaning depends on CreatorVersion
   161  }
   162  
   163  // FileInfo returns an fs.FileInfo for the [FileHeader].
   164  func (h *FileHeader) FileInfo() fs.FileInfo {
   165  	return headerFileInfo{h}
   166  }
   167  
   168  // headerFileInfo implements [fs.FileInfo].
   169  type headerFileInfo struct {
   170  	fh *FileHeader
   171  }
   172  
   173  func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) }
   174  func (fi headerFileInfo) Size() int64 {
   175  	if fi.fh.UncompressedSize64 > 0 {
   176  		return int64(fi.fh.UncompressedSize64)
   177  	}
   178  	return int64(fi.fh.UncompressedSize)
   179  }
   180  func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
   181  func (fi headerFileInfo) ModTime() time.Time {
   182  	if fi.fh.Modified.IsZero() {
   183  		return fi.fh.ModTime()
   184  	}
   185  	return fi.fh.Modified.UTC()
   186  }
   187  func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() }
   188  func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() }
   189  func (fi headerFileInfo) Sys() any          { return fi.fh }
   190  
   191  func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil }
   192  
   193  func (fi headerFileInfo) String() string {
   194  	return fs.FormatFileInfo(fi)
   195  }
   196  
   197  // FileInfoHeader creates a partially-populated [FileHeader] from an
   198  // fs.FileInfo.
   199  // Because fs.FileInfo's Name method returns only the base name of
   200  // the file it describes, it may be necessary to modify the Name field
   201  // of the returned header to provide the full path name of the file.
   202  // If compression is desired, callers should set the FileHeader.Method
   203  // field; it is unset by default.
   204  func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) {
   205  	size := fi.Size()
   206  	fh := &FileHeader{
   207  		Name:               fi.Name(),
   208  		UncompressedSize64: uint64(size),
   209  	}
   210  	fh.SetModTime(fi.ModTime())
   211  	fh.SetMode(fi.Mode())
   212  	if fh.UncompressedSize64 > uint32max {
   213  		fh.UncompressedSize = uint32max
   214  	} else {
   215  		fh.UncompressedSize = uint32(fh.UncompressedSize64)
   216  	}
   217  	return fh, nil
   218  }
   219  
   220  type directoryEnd struct {
   221  	diskNbr            uint32 // unused
   222  	dirDiskNbr         uint32 // unused
   223  	dirRecordsThisDisk uint64 // unused
   224  	directoryRecords   uint64
   225  	directorySize      uint64
   226  	directoryOffset    uint64 // relative to file
   227  	commentLen         uint16
   228  	comment            string
   229  }
   230  
   231  // timeZone returns a *time.Location based on the provided offset.
   232  // If the offset is non-sensible, then this uses an offset of zero.
   233  func timeZone(offset time.Duration) *time.Location {
   234  	const (
   235  		minOffset   = -12 * time.Hour  // E.g., Baker island at -12:00
   236  		maxOffset   = +14 * time.Hour  // E.g., Line island at +14:00
   237  		offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
   238  	)
   239  	offset = offset.Round(offsetAlias)
   240  	if offset < minOffset || maxOffset < offset {
   241  		offset = 0
   242  	}
   243  	return time.FixedZone("", int(offset/time.Second))
   244  }
   245  
   246  // msDosTimeToTime converts an MS-DOS date and time into a time.Time.
   247  // The resolution is 2s.
   248  // See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime
   249  func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
   250  	return time.Date(
   251  		// date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
   252  		int(dosDate>>9+1980),
   253  		time.Month(dosDate>>5&0xf),
   254  		int(dosDate&0x1f),
   255  
   256  		// time bits 0-4: second/2; 5-10: minute; 11-15: hour
   257  		int(dosTime>>11),
   258  		int(dosTime>>5&0x3f),
   259  		int(dosTime&0x1f*2),
   260  		0, // nanoseconds
   261  
   262  		time.UTC,
   263  	)
   264  }
   265  
   266  // timeToMsDosTime converts a time.Time to an MS-DOS date and time.
   267  // The resolution is 2s.
   268  // See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime
   269  func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
   270  	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
   271  	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
   272  	return
   273  }
   274  
   275  // ModTime returns the modification time in UTC using the legacy
   276  // [ModifiedDate] and [ModifiedTime] fields.
   277  //
   278  // Deprecated: Use [Modified] instead.
   279  func (h *FileHeader) ModTime() time.Time {
   280  	return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
   281  }
   282  
   283  // SetModTime sets the [Modified], [ModifiedTime], and [ModifiedDate] fields
   284  // to the given time in UTC.
   285  //
   286  // Deprecated: Use [Modified] instead.
   287  func (h *FileHeader) SetModTime(t time.Time) {
   288  	t = t.UTC() // Convert to UTC for compatibility
   289  	h.Modified = t
   290  	h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
   291  }
   292  
   293  const (
   294  	// Unix constants. The specification doesn't mention them,
   295  	// but these seem to be the values agreed on by tools.
   296  	s_IFMT   = 0xf000
   297  	s_IFSOCK = 0xc000
   298  	s_IFLNK  = 0xa000
   299  	s_IFREG  = 0x8000
   300  	s_IFBLK  = 0x6000
   301  	s_IFDIR  = 0x4000
   302  	s_IFCHR  = 0x2000
   303  	s_IFIFO  = 0x1000
   304  	s_ISUID  = 0x800
   305  	s_ISGID  = 0x400
   306  	s_ISVTX  = 0x200
   307  
   308  	msdosDir      = 0x10
   309  	msdosReadOnly = 0x01
   310  )
   311  
   312  // Mode returns the permission and mode bits for the [FileHeader].
   313  func (h *FileHeader) Mode() (mode fs.FileMode) {
   314  	switch h.CreatorVersion >> 8 {
   315  	case creatorUnix, creatorMacOSX:
   316  		mode = unixModeToFileMode(h.ExternalAttrs >> 16)
   317  	case creatorNTFS, creatorVFAT, creatorFAT:
   318  		mode = msdosModeToFileMode(h.ExternalAttrs)
   319  	}
   320  	if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
   321  		mode |= fs.ModeDir
   322  	}
   323  	return mode
   324  }
   325  
   326  // SetMode changes the permission and mode bits for the [FileHeader].
   327  func (h *FileHeader) SetMode(mode fs.FileMode) {
   328  	h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
   329  	h.ExternalAttrs = fileModeToUnixMode(mode) << 16
   330  
   331  	// set MSDOS attributes too, as the original zip does.
   332  	if mode&fs.ModeDir != 0 {
   333  		h.ExternalAttrs |= msdosDir
   334  	}
   335  	if mode&0200 == 0 {
   336  		h.ExternalAttrs |= msdosReadOnly
   337  	}
   338  }
   339  
   340  // isZip64 reports whether the file size exceeds the 32 bit limit
   341  func (h *FileHeader) isZip64() bool {
   342  	return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
   343  }
   344  
   345  func (h *FileHeader) hasDataDescriptor() bool {
   346  	return h.Flags&0x8 != 0
   347  }
   348  
   349  func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
   350  	if m&msdosDir != 0 {
   351  		mode = fs.ModeDir | 0777
   352  	} else {
   353  		mode = 0666
   354  	}
   355  	if m&msdosReadOnly != 0 {
   356  		mode &^= 0222
   357  	}
   358  	return mode
   359  }
   360  
   361  func fileModeToUnixMode(mode fs.FileMode) uint32 {
   362  	var m uint32
   363  	switch mode & fs.ModeType {
   364  	default:
   365  		m = s_IFREG
   366  	case fs.ModeDir:
   367  		m = s_IFDIR
   368  	case fs.ModeSymlink:
   369  		m = s_IFLNK
   370  	case fs.ModeNamedPipe:
   371  		m = s_IFIFO
   372  	case fs.ModeSocket:
   373  		m = s_IFSOCK
   374  	case fs.ModeDevice:
   375  		m = s_IFBLK
   376  	case fs.ModeDevice | fs.ModeCharDevice:
   377  		m = s_IFCHR
   378  	}
   379  	if mode&fs.ModeSetuid != 0 {
   380  		m |= s_ISUID
   381  	}
   382  	if mode&fs.ModeSetgid != 0 {
   383  		m |= s_ISGID
   384  	}
   385  	if mode&fs.ModeSticky != 0 {
   386  		m |= s_ISVTX
   387  	}
   388  	return m | uint32(mode&0777)
   389  }
   390  
   391  func unixModeToFileMode(m uint32) fs.FileMode {
   392  	mode := fs.FileMode(m & 0777)
   393  	switch m & s_IFMT {
   394  	case s_IFBLK:
   395  		mode |= fs.ModeDevice
   396  	case s_IFCHR:
   397  		mode |= fs.ModeDevice | fs.ModeCharDevice
   398  	case s_IFDIR:
   399  		mode |= fs.ModeDir
   400  	case s_IFIFO:
   401  		mode |= fs.ModeNamedPipe
   402  	case s_IFLNK:
   403  		mode |= fs.ModeSymlink
   404  	case s_IFREG:
   405  		// nothing to do
   406  	case s_IFSOCK:
   407  		mode |= fs.ModeSocket
   408  	}
   409  	if m&s_ISGID != 0 {
   410  		mode |= fs.ModeSetgid
   411  	}
   412  	if m&s_ISUID != 0 {
   413  		mode |= fs.ModeSetuid
   414  	}
   415  	if m&s_ISVTX != 0 {
   416  		mode |= fs.ModeSticky
   417  	}
   418  	return mode
   419  }
   420  

View as plain text