Golang 通过读取文件头,识别文件类型

package main

import (
    "fmt"
    "net/http"
    "os"
)

func main() {
    // Open File
    f, err := os.Open("heilong.mp3")
    if err != nil {
        panic(err)
    }
    defer f.Close()

    // Get the content
    contentType, err := GetFileContentType(f)
    if err != nil {
        panic(err)
    }

    fmt.Println("Content Type: " + contentType)
}

func GetFileContentType(out *os.File) (string, error) {
    // Only the first 512 bytes are used to sniff the content type.
    buffer := make([]byte, 512)

    _, err := out.Read(buffer)
    if err != nil {
        return "", err
    }

    // Output: [512/512]0xc0000f8000
    println(buffer)

    // Use the net/http package's handy DectectContentType function. Always returns a valid
    // content-type by returning "application/octet-stream" if no others seemed to match.
    contentType := http.DetectContentType(buffer)

    return contentType, nil
}

文件头映射表

// Data matching the table in section 6.
var sniffSignatures = []sniffSig{
    htmlSig("<!DOCTYPE HTML"),
    htmlSig("<HTML"),
    htmlSig("<HEAD"),
    htmlSig("<SCRIPT"),
    htmlSig("<IFRAME"),
    htmlSig("<H1"),
    htmlSig("<DIV"),
    htmlSig("<FONT"),
    htmlSig("<TABLE"),
    htmlSig("<A"),
    htmlSig("<STYLE"),
    htmlSig("<TITLE"),
    htmlSig("<B"),
    htmlSig("<BODY"),
    htmlSig("<BR"),
    htmlSig("<P"),
    htmlSig("<!--"),
    &maskedSig{
        mask:   []byte("\xFF\xFF\xFF\xFF\xFF"),
        pat:    []byte("<?xml"),
        skipWS: true,
        ct:     "text/xml; charset=utf-8"},
    &exactSig{[]byte("%PDF-"), "application/pdf"},
    &exactSig{[]byte("%!PS-Adobe-"), "application/postscript"},

    // UTF BOMs.
    &maskedSig{
        mask: []byte("\xFF\xFF\x00\x00"),
        pat:  []byte("\xFE\xFF\x00\x00"),
        ct:   "text/plain; charset=utf-16be",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\x00\x00"),
        pat:  []byte("\xFF\xFE\x00\x00"),
        ct:   "text/plain; charset=utf-16le",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\x00"),
        pat:  []byte("\xEF\xBB\xBF\x00"),
        ct:   "text/plain; charset=utf-8",
    },

    // Image types
    // For posterity, we originally returned "image/vnd.microsoft.icon" from
    // https://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03#section-7
    // https://codereview.appspot.com/4746042
    // but that has since been replaced with "image/x-icon" in Section 6.2
    // of https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern
    &exactSig{[]byte("\x00\x00\x01\x00"), "image/x-icon"},
    &exactSig{[]byte("\x00\x00\x02\x00"), "image/x-icon"},
    &exactSig{[]byte("BM"), "image/bmp"},
    &exactSig{[]byte("GIF87a"), "image/gif"},
    &exactSig{[]byte("GIF89a"), "image/gif"},
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"),
        pat:  []byte("RIFF\x00\x00\x00\x00WEBPVP"),
        ct:   "image/webp",
    },
    &exactSig{[]byte("\x89PNG\x0D\x0A\x1A\x0A"), "image/png"},
    &exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"},

    // Audio and Video types
    // Enforce the pattern match ordering as prescribed in
    // https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF"),
        pat:  []byte(".snd"),
        ct:   "audio/basic",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
        pat:  []byte("FORM\x00\x00\x00\x00AIFF"),
        ct:   "audio/aiff",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF"),
        pat:  []byte("ID3"),
        ct:   "audio/mpeg",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\xFF"),
        pat:  []byte("OggS\x00"),
        ct:   "application/ogg",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"),
        pat:  []byte("MThd\x00\x00\x00\x06"),
        ct:   "audio/midi",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
        pat:  []byte("RIFF\x00\x00\x00\x00AVI "),
        ct:   "video/avi",
    },
    &maskedSig{
        mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"),
        pat:  []byte("RIFF\x00\x00\x00\x00WAVE"),
        ct:   "audio/wave",
    },
    // 6.2.0.2. video/mp4
    mp4Sig{},
    // 6.2.0.3. video/webm
    &exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"},

    // Font types
    &maskedSig{
        // 34 NULL bytes followed by the string "LP"
        pat: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LP"),
        // 34 NULL bytes followed by \xF\xF
        mask: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF"),
        ct:   "application/vnd.ms-fontobject",
    },
    &exactSig{[]byte("\x00\x01\x00\x00"), "font/ttf"},
    &exactSig{[]byte("OTTO"), "font/otf"},
    &exactSig{[]byte("ttcf"), "font/collection"},
    &exactSig{[]byte("wOFF"), "font/woff"},
    &exactSig{[]byte("wOF2"), "font/woff2"},

    // Archive types
    &exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"},
    &exactSig{[]byte("PK\x03\x04"), "application/zip"},
    // RAR's signatures are incorrectly defined by the MIME spec as per
    //    https://github.com/whatwg/mimesniff/issues/63
    // However, RAR Labs correctly defines it at:
    //    https://www.rarlab.com/technote.htm#rarsign
    // so we use the definition from RAR Labs.
    // TODO: do whatever the spec ends up doing.
    &exactSig{[]byte("Rar!\x1A\x07\x00"), "application/x-rar-compressed"},     // RAR v1.5-v4.0
    &exactSig{[]byte("Rar!\x1A\x07\x01\x00"), "application/x-rar-compressed"}, // RAR v5+

    &exactSig{[]byte("\x00\x61\x73\x6D"), "application/wasm"},

    textSig{}, // should be last
}

发表评论