• he/him

Coder, pun perpetrator
Grumpiness elemental
Hyperbole abuser


Tools programmer
Writer-wannabe
Did translations once upon a time
I contain multitudes


(TurfsterNTE off Twitter)


Trans rights
Black lives matter


Be excellent to each other


UE4/5 Plugins on Itch
nte.itch.io/

nex3
@nex3

A lot of the discussion around the huge automated statistical models that are conventionally called "AI" involves the idea of hypothetical future improvements to the system that will make it concretely useful for things that it's currently not very good at. While undoubtedly it will get better at the sorts of things it can already do, and may even at some point be able to render a hand that doesn't make me want to vomit, there's one important thing that gets glossed over in a lot of conversation because programmers think it's so obvious it doesn't even warrant mentioning and non-programmers may not be aware of it at all.

AI cannot be programmed. AI is not like science fictional depictions where you can just build three laws into it and have it unerringly follow them, and it's not like conventional software where it rigorously follows a minutely precise set of instructions. You cannot simply make the Bing chatbot more accurate by plugging in a big database of verified facts and rules of deduction, because it fundamentally has no idea what truth is. It is a statistical model of what people are likely to say on the internet, and it's so unimaginably huge that even a team of humans couldn't possibly manually correct it except in the most broad strokes imaginable.

You can't even tell it "this is what a statement of fact looks like" because to tell it anything at all, you need an approximately-internet-sized corpus of training data with annotations that accurately indicate that information and that doesn't exist. The only internet-sized corpus is the one they've already used, and it certainly doesn't have sentence-level semantic metadata. So you're stuck: you can push the statistics as hard as you want but they'll never really do what you want because you can never tell them what you want in a language they'll understand.



somewhatnifty
@somewhatnifty

this is because they are wimps. just do it. just do what an mp3 does but like with whatever. i want to shrink my gigabytes of random crap down to arbitrary size and send them fast to my pals. it's all just numbers anyway, right? so long as most of the digits are sorta close i don't see the issue. lossy zip a text file? you can probably make out most of the words. lossy zip a .wav? there'll probably be some sounds in there. you can listen to those. lossy zip an .exe? it might not even crash right away. maybe it'll do something cool. you don't know.


lexi
@lexi

i am not joking you can now compress your files as lossy JPEGs

echo hello world |
  node index --quality 100 --compress - |
  node index --decompress -
hel�mv�qld�
curl https://www.gnu.org/licenses/gpl-3.0.txt |
  node index --quality 100 --compress - |
  node index --decompress - |
  head -c1000
 !□   □   □   □   □GNU□HFO□R@L□OTA□IC□JBE□RD  □   □   □   □!!□!!!□ U□rsi□n1□ 19□Juo□1/□5
□Cnq□rhg□s (□) 1□/6"□rde□Sod□xar□ En□nb`□hnn□Im□.<□ttp□9/.□sf.□sf.□
 D□fr|□ne □s p□rlh□tdc□sn □ooy□amc□dis□rib□te □dqa□tim□dpr□er
□pg □hhr□jic□nre□bnb□mdm□,a□ra□bmj□mg□sg□mn□_l□mve□.

□   □   □   □   □!!□   □   □sd_□bkd□
 □ge□OU#□dmd□bj □ual□d M□cep□ei□ `□qee□ bn□xld□t l□cen□f!f□q        t□fsw□qe□nc □shc□ ji□cs□fw□rjs□
         □She□jic□nre□ en□mn□t r□fsw□qe□mc □shc□op□bth□al□nql□ aq□ de□hfm□b
t□r`□d a□`y!□nus□eqe□cnl□sn □gaq□am□ ch□nfe□shc□xps□s-□Ax□omt□_su□
th□ GN□Fd□dq_□ Pt□kha□Lic□osf□is □nre□fde□ro □u`q□nre□xn□qe□eec□m t□ sg□rf □nc□hbn□d _□ku□rsi□mr!□fa□oro□s_l□,sm□mak□rt□d i□rd□ahn□fq□c
r□fsw□qe□or!□kk□ss□ser□- □c+ □ge□qee□Sof□xar□ Ep□mc`□hpo□ tr□sh□
GN□ Ge□dq_□ Ou□mh`□Lic□nre□eor□nps□ne□nus□snf□xar□;h□an□mgc□ ak□m t`m□or□er□nqj□sdk□ate□ th□r v□xa□ hs□ au□gnr□-"□ou!□am□ppl□ hs□tn
□nus□psp□r`l□-s□n-     □ U□dm □es□d`j□of□qee□snf□xar□, v□ aq□rb□drq□mg□

and this is how the GPL looks:

random noise and a black rectangle

source code

this is licensed under the Whatever The Fuck You Do, Do Not Use This Software Under Any Circumstances License

you need node.js installed, and need to npm install jimp command-line-usage command-line-args. then download the script from here or copy the source code:

const commandLineArgs = require('command-line-args')
const commandLineUsage = require('command-line-usage')
const jimp = require('jimp')
const fs = require('fs')

const optionDefinitions = [
    {
        name: 'help',
        alias: 'h',
        type: Boolean,
        description: 'Display this usage guide.'
    },
    {
        name: 'decompress',
        alias: 'd',
        type: String,
        description: 'File to decompress',
        typeLabel: '<files>'
    },
    {
        name: "compress",
        alias: "c",
        type: String,
        description: "File to compress into",
        typeLabel: "<files>"
    },
    {
        name: "quality",
        alias: "q",
        type: Number,
        description: "Lossy quality",

    },
    {
        name: "output",
        alias: "o",
        type: String,
        description: "Output file",
    }
]

const options = commandLineArgs(optionDefinitions)

if (options.help || (!options.decompress && !options.compress)) {
    const usage = commandLineUsage([
        {
            header: 'lostar - Lossy Archiver',
            content: 'A lossy file archiver. Oh god why'
        },
        {
            header: 'Options',
            optionList: optionDefinitions
        },
        {
            content: 'Note: You can pass - for stdin/stdout. Output defaults to stdout.'
        }
    ])
    console.log(usage)
} else {
    options.quality = options.quality || 100
    let file = options.compress || options.decompress
    if (file === '-') {
        // read whole stdin into buffer
        let buffer = Buffer.alloc(0)
        process.stdin.on('data', (chunk) => {
            buffer = Buffer.concat([buffer, chunk])
        })
        process.stdin.on('end', () => {
            options.decompress ? decompress(buffer, options) : compress(buffer, options)
        })
    } else {
        let fileBuf = fs.readFileSync(file)
        options.decompress ? decompress(fileBuf, options) : compress(fileBuf, options)
    }
}

// We store files lossy by storing 3 bytes per pixel, and then we write it out as a JPEG to compress it lossy
// The image is always square, and the rest of the image that does not have any data is filled with 0s (for max compression)
// When decompressing, we just trim the trailing 0s. This is a horrible idea. Don't use this. Also, its lossy, so it will probably break

async function compress(buffer, options) {
    // We want to make a square image, so we need to find the smallest square number that is greater than or equal to the buffer size
    let size = Math.ceil(Math.sqrt(buffer.length / 3))
    let image = new jimp(size, size)
    image.quality(options.quality)
    let pixels = image.bitmap.data
    for (let i = 0; i < buffer.length; i++) {
        pixels[i] = buffer[i]
    }
    let out = await image.getBufferAsync(jimp.MIME_JPEG)
    if (options.output && options.output !== "-") {
        fs.writeFileSync(options.output, out)
    } else {
        process.stdout.write(out)
    }
}

async function decompress(buffer, options) {
    let image = await jimp.read(buffer)
    image.quality(options.quality)
    let pixels = image.bitmap.data
    let out = Buffer.alloc(pixels.length)
    for (let i = 0; i < pixels.length; i++) {
        out[i] = pixels[i]
    }
    let end = -1;
    out.forEach((byte, i) => {
        if (byte !== 0) {
            end = i
        }
    })
    if (end === -1) {
        end = out.length
    }
    out = out.slice(0, end)
    if (options.output && options.output !== "-") {
        fs.writeFileSync(options.output, out)
    } else {
        process.stdout.write(out)
    }
}

i am so sorry


Turfster
@Turfster

I offer you a violence on this fucking monday.